diff --git a/.deepsource.toml b/.deepsource.toml deleted file mode 100644 index e68653328f..0000000000 --- a/.deepsource.toml +++ /dev/null @@ -1,7 +0,0 @@ -version = 1 - -test_patterns = ["zarr/tests/test_*.py"] - -[[analyzers]] -name = "python" -enabled = true diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 7da1f9608e..0000000000 --- a/.flake8 +++ /dev/null @@ -1,2 +0,0 @@ -[flake8] -max-line-length = 100 diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 53bf4633f0..9e0316032f 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,4 @@ # lint codebase with black and ruff 4e348d6b80c96da461fd866576c971b8a659ba15 +# migrate from black to ruff format +22cea005629913208a85799372e045f353744add diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 0000000000..8fb235d704 --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,4 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ +ref-names: $Format:%D$ diff --git a/.gitattributes b/.gitattributes index b6115e6e49..57eb8a8807 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,3 @@ *.py linguist-language=python *.ipynb linguist-documentation +.git_archival.txt export-subst diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 7b0c4dcfc4..705cd31cb5 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,5 +1,5 @@ -name: 🐛 File a bug report -description: X's behavior is deviating from its documented behavior. +name: Bug Report +description: Report incorrect behaviour in the library. labels: ["bug"] body: - type: markdown diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 9ceaab2ae7..edbd88eaf2 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,11 +1,11 @@ blank_issues_enabled: true contact_links: - - name: ✨ Propose a new major feature + - name: Propose a new major feature url: https://github.com/zarr-developers/zarr-specs about: A new major feature should be discussed in the Zarr specifications repository. - - name: ❓ Discuss something on Zulip + - name: Discuss something on ZulipChat url: https://ossci.zulipchat.com/ - about: For questions like "How do I do X with Zarr?", you can move to our Zulip Chat. - - name: ❓ Discuss something on GitHub Discussions + about: For questions like "How do I do X with Zarr?", you can move to our ZulipChat. + - name: Discuss something on GitHub Discussions url: https://github.com/zarr-developers/zarr-python/discussions about: For questions like "How do I do X with Zarr?", you can move to GitHub Discussions. diff --git a/.github/ISSUE_TEMPLATE/documentation.yml b/.github/ISSUE_TEMPLATE/documentation.yml new file mode 100644 index 0000000000..c240666d1f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.yml @@ -0,0 +1,18 @@ +name: Documentation Improvement +description: Report missing or wrong documentation. Alternatively, you can just open a pull request with the suggested change. +title: "DOC: " +labels: [documentation, help wanted] + +body: +- type: textarea + attributes: + label: Describe the issue linked to the documentation + description: > + Please provide a description of what documentation you believe needs to be fixed/improved. + validations: + required: true +- type: textarea + attributes: + label: Suggested fix for documentation + description: > + Please explain the suggested fix and why it's better than the existing documentation. diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5a0befe9b5..a437a5c269 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,8 +1,28 @@ --- version: 2 updates: + # Updates for v3 branch (the default branch) - package-ecosystem: "pip" directory: "/" + schedule: + interval: "daily" + groups: + actions: + patterns: + - "*" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + groups: + actions: + patterns: + - "*" + + # Same updates, but for main branch + - package-ecosystem: "pip" + directory: "/" + target-branch: "main" schedule: interval: "daily" groups: @@ -11,5 +31,10 @@ updates: - "*" - package-ecosystem: "github-actions" directory: "/" + target-branch: "main" schedule: interval: "weekly" + groups: + actions: + patterns: + - "*" diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml new file mode 100644 index 0000000000..e0be897532 --- /dev/null +++ b/.github/workflows/gpu_test.yml @@ -0,0 +1,66 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: GPU Test V3 + +on: + push: + branches: [ v3 ] + pull_request: + branches: [ v3 ] + workflow_dispatch: + +env: + LD_LIBRARY_PATH: /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }} + + runs-on: gpu-runner + strategy: + matrix: + python-version: ['3.11'] + numpy-version: ['2.0'] + dependency-set: ["minimal"] + + steps: + - uses: actions/checkout@v4 + # - name: cuda-toolkit + # uses: Jimver/cuda-toolkit@v0.2.16 + # id: cuda-toolkit + # with: + # cuda: '12.4.1' + - name: Set up CUDA + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get -y install cuda-toolkit-12-6 + echo "/usr/local/cuda/bin" >> $GITHUB_PATH + - name: GPU check + run: | + nvidia-smi + echo $PATH + echo $LD_LIBRARY_PATH + nvcc -V + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install Hatch and CuPy + run: | + python -m pip install --upgrade pip + pip install hatch + - name: Set Up Hatch Env + run: | + hatch env create gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} + hatch env run -e gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env + - name: Run Tests + run: | + hatch env run --env gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-coverage diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml new file mode 100644 index 0000000000..c5a239c274 --- /dev/null +++ b/.github/workflows/hypothesis.yaml @@ -0,0 +1,84 @@ +name: Slow Hypothesis CI +on: + push: + branches: + - "main" + - "v3" + pull_request: + branches: + - "main" + - "v3" + types: [opened, reopened, synchronize, labeled] + schedule: + - cron: "0 0 * * *" # Daily “At 00:00” UTC + workflow_dispatch: # allows you to trigger manually + +env: + FORCE_COLOR: 3 + +jobs: + + hypothesis: + name: Slow Hypothesis Tests + runs-on: "ubuntu-latest" + defaults: + run: + shell: bash -l {0} + + strategy: + matrix: + python-version: ['3.11'] + numpy-version: ['1.26'] + dependency-set: ["optional"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install Hatch + run: | + python -m pip install --upgrade pip + pip install hatch + - name: Set Up Hatch Env + run: | + hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} + hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env + # https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache + - name: Restore cached hypothesis directory + id: restore-hypothesis-cache + uses: actions/cache/restore@v4 + with: + path: .hypothesis/ + key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }} + restore-keys: | + cache-hypothesis- + + - name: Run slow Hypothesis tests + if: success() + id: status + run: | + hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis + + # explicitly save the cache so it gets updated, also do this even if it fails. + - name: Save cached hypothesis directory + id: save-hypothesis-cache + if: always() && steps.status.outcome != 'skipped' + uses: actions/cache/save@v4 + with: + path: .hypothesis/ + key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }} + + - name: Generate and publish the report + if: | + failure() + && steps.status.outcome == 'failure' + && github.event_name == 'schedule' + && github.repository_owner == 'zarr-developers' + uses: xarray-contrib/issue-from-pytest-log@v1 + with: + log-path: output-${{ matrix.python-version }}-log.jsonl + issue-title: "Nightly Hypothesis tests failed" + issue-label: "topic-hypothesis" diff --git a/.github/workflows/issue-metrics.yml b/.github/workflows/issue-metrics.yml new file mode 100644 index 0000000000..34bda59ff6 --- /dev/null +++ b/.github/workflows/issue-metrics.yml @@ -0,0 +1,42 @@ +name: Monthly issue metrics +on: + workflow_dispatch: + schedule: + - cron: '3 2 1 * *' + +permissions: + contents: read + +jobs: + build: + name: issue metrics + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: read + steps: + - name: Get dates for last month + shell: bash + run: | + # Calculate the first day of the previous month + first_day=$(date -d "last month" +%Y-%m-01) + + # Calculate the last day of the previous month + last_day=$(date -d "$first_day +1 month -1 day" +%Y-%m-%d) + + #Set an environment variable with the date range + echo "$first_day..$last_day" + echo "last_month=$first_day..$last_day" >> "$GITHUB_ENV" + + - name: Run issue-metrics tool + uses: github/issue-metrics@v3 + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SEARCH_QUERY: 'repo:zarr-developers/zarr-python is:issue created:${{ env.last_month }} -reason:"not planned"' + + - name: Create issue + uses: peter-evans/create-issue-from-file@v5 + with: + title: Monthly issue metrics report + token: ${{ secrets.GITHUB_TOKEN }} + content-filepath: ./issue_metrics.md diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml deleted file mode 100644 index b5b2f48d62..0000000000 --- a/.github/workflows/minimal.yml +++ /dev/null @@ -1,41 +0,0 @@ -# This workflow simulates the environment found during a conda-forge build -# and makes sure that Zarr can run without fsspec and other additional libraries. -name: Minimal installation - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - minimum_build: - - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v3.0.4 - with: - channels: conda-forge - environment-file: environment.yml - activate-environment: minimal - - name: Tests - shell: "bash -l {0}" - env: - ZARR_V3_EXPERIMENTAL_API: 1 - ZARR_V3_SHARDING: 1 - run: | - conda activate minimal - python -m pip install . - pytest -svx --timeout=300 - - name: Fixture generation - shell: "bash -l {0}" - env: - ZARR_V3_EXPERIMENTAL_API: 1 - ZARR_V3_SHARDING: 1 - run: | - conda activate minimal - rm -rf fixture/ - pytest -svx --timeout=300 zarr/tests/test_dim_separator.py zarr/tests/test_storage.py - # This simulates fixture-less tests in conda and debian packaging diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml deleted file mode 100644 index b1be7e425d..0000000000 --- a/.github/workflows/python-package.yml +++ /dev/null @@ -1,82 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Linux Testing - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ['3.10', '3.11', '3.12'] - numpy_version: ['>=2.1', '==1.24.*'] - exclude: - - python-version: '3.12' - numpy_version: '==1.24.*' - services: - redis: - image: redis - # Set health checks to wait until redis has started - options: >- - --health-cmd "redis-cli ping" - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 6379:6379 - mongodb: - image: mongo:4.4.11 - ports: - - 27017:27017 - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v3.0.4 - with: - channels: conda-forge - python-version: ${{ matrix.python-version }} - env: - ACTIONS_ALLOW_UNSECURE_COMMANDS: true - # Runs a set of commands using the runners shell - - name: Create Conda environment with the rights deps - shell: "bash -l {0}" - run: | - conda create -n zarr-env python==${{matrix.python-version}} bsddb3 pip nodejs - conda activate zarr-env - npm install -g azurite - - name: Install dependencies - shell: "bash -l {0}" - run: | - conda activate zarr-env - python -m pip install --upgrade pip - python -m pip install -r requirements_dev_minimal.txt numpy${{matrix.numpy_version}} -r requirements_dev_optional.txt line_profiler pymongo redis - python -m pip install -e . - python -m pip freeze - - name: Tests - shell: "bash -l {0}" - env: - COVERAGE_FILE: .coverage.${{matrix.python-version}}.${{matrix.numpy_version}} - ZARR_TEST_ABS: 1 - ZARR_TEST_MONGO: 1 - ZARR_TEST_REDIS: 1 - ZARR_V3_EXPERIMENTAL_API: 1 - ZARR_V3_SHARDING: 1 - run: | - conda activate zarr-env - mkdir ~/blob_emulator - azurite -l ~/blob_emulator --debug debug.log 2>&1 > stdouterr.log & - pytest --cov=zarr --cov-config=pyproject.toml --doctest-plus --cov-report xml --cov=./ --timeout=300 - - uses: codecov/codecov-action@v4 - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - with: - verbose: true # optional (default = false) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 26b669abea..bab53958dc 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -23,19 +23,10 @@ jobs: - name: Install PyBuild run: | - python -m pip install 'build!=0.1' setuptools-scm - + python -m pip install --upgrade pip + pip install hatch - name: Build wheel and sdist - run: | - python -m build - git describe - pwd - if [ -f dist/zarr-0.0.0.tar.gz ]; then - echo "WRONG VERSION NUMBER" - exit 1 - else - echo "All seem good" - fi + run: hatch build - uses: actions/upload-artifact@v4 with: name: releases diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000000..5683b62dff --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,45 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Test V3 + +on: + push: + branches: [ v3 ] + pull_request: + branches: [ v3 ] + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }} + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.11', '3.12'] + numpy-version: ['1.25', '1.26', '2.0'] + dependency-set: ["minimal", "optional"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install Hatch + run: | + python -m pip install --upgrade pip + pip install hatch + - name: Set Up Hatch Env + run: | + hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} + hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env + - name: Run Tests + run: | + hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml deleted file mode 100644 index 1e22fec6d1..0000000000 --- a/.github/workflows/windows-testing.yml +++ /dev/null @@ -1,60 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Python package - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - windows: - name: Windows Tests - runs-on: "windows-latest" - strategy: - fail-fast: True - matrix: - python-version: ['3.10', '3.11'] - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - uses: conda-incubator/setup-miniconda@v3.0.4 - with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - channels: conda-forge - env: - ACTIONS_ALLOW_UNSECURE_COMMANDS: true - - name: Create Conda environment with the rights deps - shell: bash -l {0} - run: | - conda create -n zarr-env python==${{matrix.python-version}} pip nodejs - - name: Install dependencies - shell: bash -l {0} - run: | - conda activate zarr-env - python -m pip install --upgrade pip - python -m pip install -r requirements_dev_numpy.txt -r requirements_dev_minimal.txt -r requirements_dev_optional.txt - python -m pip install . - python -m pip freeze - npm install -g azurite - - name: Run Tests - shell: bash -l {0} - run: | - conda activate zarr-env - mkdir ~/blob_emulator - azurite -l ~/blob_emulator --debug debug.log 2>&1 > stdouterr.log & - pytest -sv --timeout=300 - env: - ZARR_TEST_ABS: 1 - ZARR_V3_EXPERIMENTAL_API: 1 - ZARR_V3_SHARDING: 1 - - name: Conda info - shell: bash -l {0} - run: conda info - - name: Conda list - shell: pwsh - run: conda list diff --git a/.gitignore b/.gitignore index 7de405d8a0..199ab10578 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,7 @@ coverage.xml # Sphinx documentation docs/_build/ -docs/_autoapi/ +docs/_autoapi # PyBuilder target/ @@ -62,8 +62,8 @@ target/ # Jupyter .ipynb_checkpoints/ -# setuptools-scm -zarr/version.py +# VCS versioning +src/zarr/_version.py # emacs *~ @@ -78,5 +78,11 @@ zarr/version.py #doesnotexist #test_sync* data/* +src/fixture/ +fixture/ .DS_Store +tests/.hypothesis +.hypothesis/ + +zarr/version.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index be57770200..0dd5bd73df 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,33 +2,54 @@ ci: autoupdate_commit_msg: "chore: update pre-commit hooks" autofix_commit_msg: "style: pre-commit fixes" autofix_prs: false -default_stages: [commit, push] +default_stages: [pre-commit, pre-push] default_language_version: python: python3 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: 'v0.4.4' + rev: v0.6.9 hooks: - id: ruff - - repo: https://github.com/psf/black - rev: 24.4.2 - hooks: - - id: black + args: ["--fix", "--show-fixes"] + - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell + args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo,zar", "-S", "fixture"] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.10.0 + rev: v1.11.2 hooks: - id: mypy - files: zarr - args: [] + files: src|tests additional_dependencies: + # Package dependencies + - asciitree + - crc32c + - donfig + - fasteners + - numcodecs + - numpy + - typing_extensions + - universal-pathlib + # Tests + - pytest + # Zarr v2 - types-redis - - types-setuptools + - repo: https://github.com/scientific-python/cookie + rev: 2024.08.19 + hooks: + - id: sp-repo-review + - repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 + hooks: + - id: rst-directive-colons + - id: rst-inline-touching-normal + - repo: https://github.com/numpy/numpydoc + rev: v1.8.0 + hooks: + - id: numpydoc-validation diff --git a/.readthedocs.yaml b/.readthedocs.yaml index d7190b4771..32a3f0e4e1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,21 +1,19 @@ version: 2 build: - os: ubuntu-20.04 + os: ubuntu-22.04 tools: - python: "3.11" + python: "3.12" sphinx: configuration: docs/conf.py fail_on_warning: true +formats: all + python: - install: - - method: pip - path: . - extra_requirements: + install: + - method: pip + path: . + extra_requirements: - docs - -formats: - - htmlzip - - pdf diff --git a/README-v3.md b/README-v3.md new file mode 100644 index 0000000000..8348038e5a --- /dev/null +++ b/README-v3.md @@ -0,0 +1,49 @@ +# V3 Contributor Guide + +A bare-bones guide to contributing to V3. + +Developed for the Feb. 2024 Zarr Sprint. + +## Clone V3 branch + +[Fork](https://github.com/zarr-developers/zarr-python/fork) zarr-python and clone it locally. + +``` +git clone {your remote} +git remote add upstream https://github.com/zarr-developers/zarr-python +git fetch upstream +git checkout --track upstream/v3 +``` + +## Set up your environment + +Zarr uses [hatch](https://hatch.pypa.io/) for its build system. + +``` +mamba install hatch +``` + +or + +``` +pip install hatch +``` + +Then + +``` +hatch env create test +``` + +## Run the Tests + +``` +hatch run test:run +``` + +or + +``` +hatch -e test shell +pytest -v +``` \ No newline at end of file diff --git a/bench/compress_normal.py b/bench/compress_normal.py index 803d54b76b..179520a0e4 100644 --- a/bench/compress_normal.py +++ b/bench/compress_normal.py @@ -1,9 +1,9 @@ import sys import timeit +import line_profiler import numpy as np -import line_profiler import zarr from zarr import blosc @@ -16,7 +16,7 @@ a, chunks=1000000, compression="blosc", - compression_opts=dict(cname="lz4", clevel=5, shuffle=2), + compression_opts={"cname": "lz4", "clevel": 5, "shuffle": 2}, ) print(z) diff --git a/docs/Makefile b/docs/Makefile index e6adc1ca8c..fc8fa12915 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -52,6 +52,7 @@ help: .PHONY: clean clean: rm -rf $(BUILDDIR)/* + rm -rf $(BUILDDIR)/../_autoapi .PHONY: html html: diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 4e664a596b..1d32606f9a 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,12 +1,5 @@ @import url('https://fonts.googleapis.com/css2?family=Lato:ital,wght@0,400;0,700;0,900;1,400;1,700;1,900&family=Open+Sans:ital,wght@0,400;0,600;1,400;1,600&display=swap'); -.navbar-brand img { - height: 75px; -} -.navbar-brand { - height: 75px; -} - body { font-family: 'Open Sans', sans-serif; } @@ -115,7 +108,3 @@ html[data-theme=dark] .sd-card .sd-card-footer { html[data-theme=dark] h1 { color: var(--pst-color-primary); } - -html[data-theme=dark] h3 { - color: #0a6774; -} diff --git a/docs/_static/custom.js b/docs/_static/custom.js index dcb584ecd5..52f1cba9e0 100644 --- a/docs/_static/custom.js +++ b/docs/_static/custom.js @@ -6,7 +6,6 @@ "highlights": "getting_started.html#highlights", "contributing": "contributing.html", "projects-using-zarr": "getting_started.html#projects-using-zarr", - "acknowledgments": "acknowledgments.html", "contents": "getting_started.html#contents", "indices-and-tables": "api.html#indices-and-tables" } diff --git a/docs/_static/logo_horizontal.svg b/docs/_static/logo_horizontal.svg new file mode 100644 index 0000000000..90fa4bc3a8 --- /dev/null +++ b/docs/_static/logo_horizontal.svg @@ -0,0 +1,135 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/acknowledgments.rst b/docs/acknowledgments.rst deleted file mode 100644 index 4fce1e8ae4..0000000000 --- a/docs/acknowledgments.rst +++ /dev/null @@ -1,76 +0,0 @@ -Acknowledgments -=============== - -The following people have contributed to the development of Zarr by contributing code, -documentation, code reviews, comments and/or ideas: - -* :user:`Alistair Miles ` -* :user:`Altay Sansal ` -* :user:`Anderson Banihirwe ` -* :user:`Andrew Fulton ` -* :user:`Andrew Thomas ` -* :user:`Anthony Scopatz ` -* :user:`Attila Bergou ` -* :user:`BGCMHou ` -* :user:`Ben Jeffery ` -* :user:`Ben Williams ` -* :user:`Boaz Mohar ` -* :user:`Charles Noyes ` -* :user:`Chris Barnes ` -* :user:`David Baddeley ` -* :user:`Davis Bennett ` -* :user:`Dimitri Papadopoulos Orfanos ` -* :user:`Eduardo Gonzalez ` -* :user:`Elliott Sales de Andrade ` -* :user:`Eric Prestat ` -* :user:`Eric Younkin ` -* :user:`Francesc Alted ` -* :user:`Greggory Lee ` -* :user:`Gregory R. Lee ` -* :user:`Ian Hunt-Isaak ` -* :user:`James Bourbeau ` -* :user:`Jan Funke ` -* :user:`Jerome Kelleher ` -* :user:`Joe Hamman ` -* :user:`Joe Jevnik ` -* :user:`John Kirkham ` -* :user:`Josh Moore ` -* :user:`Juan Nunez-Iglesias ` -* :user:`Justin Swaney ` -* :user:`Mads R. B. Kristensen ` -* :user:`Mamy Ratsimbazafy ` -* :user:`Martin Durant ` -* :user:`Matthew Rocklin ` -* :user:`Matthias Bussonnier ` -* :user:`Mattia Almansi ` -* :user:`Noah D Brenowitz ` -* :user:`Oren Watson ` -* :user:`Pavithra Eswaramoorthy ` -* :user:`Poruri Sai Rahul ` -* :user:`Prakhar Goel ` -* :user:`Raphael Dussin ` -* :user:`Ray Bell ` -* :user:`Richard Scott ` -* :user:`Richard Shaw ` -* :user:`Ryan Abernathey ` -* :user:`Ryan Williams ` -* :user:`Saransh Chopra ` -* :user:`Sebastian Grill ` -* :user:`Shikhar Goenka ` -* :user:`Shivank Chaudhary ` -* :user:`Stephan Hoyer ` -* :user:`Stephan Saalfeld ` -* :user:`Tarik Onalan ` -* :user:`Tim Crone ` -* :user:`Tobias Kölling ` -* :user:`Tom Augspurger ` -* :user:`Tom White ` -* :user:`Tommy Tran ` -* :user:`Trevor Manz ` -* :user:`Vincent Schut ` -* :user:`Vyas Ramasubramani ` -* :user:`Zain Patel ` -* :user:`gsakkis` -* :user:`hailiangzhang ` -* :user:`pmav99 ` -* :user:`sbalmer ` diff --git a/docs/api.rst b/docs/api.rst deleted file mode 100644 index e200dd908d..0000000000 --- a/docs/api.rst +++ /dev/null @@ -1,23 +0,0 @@ -API reference -============= - -.. toctree:: - :maxdepth: 3 - - api/creation - api/core - api/hierarchy - api/storage - api/n5 - api/convenience - api/codecs - api/attrs - api/sync - api/v3 - -Indices and tables ------------------- - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/api/attrs.rst b/docs/api/attrs.rst deleted file mode 100644 index f95e63af3a..0000000000 --- a/docs/api/attrs.rst +++ /dev/null @@ -1,16 +0,0 @@ -The Attributes class (``zarr.attrs``) -===================================== -.. module:: zarr.attrs - -.. autoclass:: Attributes - - .. automethod:: __getitem__ - .. automethod:: __setitem__ - .. automethod:: __delitem__ - .. automethod:: __iter__ - .. automethod:: __len__ - .. automethod:: keys - .. automethod:: asdict - .. automethod:: put - .. automethod:: update - .. automethod:: refresh diff --git a/docs/api/codecs.rst b/docs/api/codecs.rst deleted file mode 100644 index b50f747d74..0000000000 --- a/docs/api/codecs.rst +++ /dev/null @@ -1,23 +0,0 @@ -Compressors and filters (``zarr.codecs``) -========================================= -.. module:: zarr.codecs - -This module contains compressor and filter classes for use with Zarr. Please note that this module -is provided for backwards compatibility with previous versions of Zarr. From Zarr version 2.2 -onwards, all codec classes have been moved to a separate package called Numcodecs_. The two -packages (Zarr and Numcodecs_) are designed to be used together. For example, a Numcodecs_ codec -class can be used as a compressor for a Zarr array:: - - >>> import zarr - >>> from numcodecs import Blosc - >>> z = zarr.zeros(1000000, compressor=Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE)) - -Codec classes can also be used as filters. See the tutorial section on :ref:`tutorial_filters` -for more information. - -Please note that it is also relatively straightforward to define and register custom codec -classes. See the Numcodecs `codec API `_ and -`codec registry `_ documentation for more -information. - -.. _Numcodecs: https://numcodecs.readthedocs.io/ diff --git a/docs/api/convenience.rst b/docs/api/convenience.rst deleted file mode 100644 index a70a90ce7c..0000000000 --- a/docs/api/convenience.rst +++ /dev/null @@ -1,14 +0,0 @@ -Convenience functions (``zarr.convenience``) -============================================ -.. automodule:: zarr.convenience -.. autofunction:: open -.. autofunction:: save -.. autofunction:: load -.. autofunction:: save_array -.. autofunction:: save_group -.. autofunction:: copy -.. autofunction:: copy_all -.. autofunction:: copy_store -.. autofunction:: tree -.. autofunction:: consolidate_metadata -.. autofunction:: open_consolidated diff --git a/docs/api/core.rst b/docs/api/core.rst deleted file mode 100644 index b310460e51..0000000000 --- a/docs/api/core.rst +++ /dev/null @@ -1,5 +0,0 @@ -The Array class (``zarr.core``) -=============================== - -.. automodapi:: zarr.core - :no-heading: diff --git a/docs/api/creation.rst b/docs/api/creation.rst deleted file mode 100644 index 66422c0670..0000000000 --- a/docs/api/creation.rst +++ /dev/null @@ -1,15 +0,0 @@ -Array creation (``zarr.creation``) -================================== -.. module:: zarr.creation -.. autofunction:: create -.. autofunction:: empty -.. autofunction:: zeros -.. autofunction:: ones -.. autofunction:: full -.. autofunction:: array -.. autofunction:: open_array -.. autofunction:: empty_like -.. autofunction:: zeros_like -.. autofunction:: ones_like -.. autofunction:: full_like -.. autofunction:: open_like diff --git a/docs/api/hierarchy.rst b/docs/api/hierarchy.rst deleted file mode 100644 index 11a5575144..0000000000 --- a/docs/api/hierarchy.rst +++ /dev/null @@ -1,41 +0,0 @@ -Groups (``zarr.hierarchy``) -=========================== -.. module:: zarr.hierarchy - -.. autofunction:: group -.. autofunction:: open_group - -.. autoclass:: Group - - .. automethod:: __len__ - .. automethod:: __iter__ - .. automethod:: __contains__ - .. automethod:: __getitem__ - .. automethod:: __enter__ - .. automethod:: __exit__ - .. automethod:: group_keys - .. automethod:: groups - .. automethod:: array_keys - .. automethod:: arrays - .. automethod:: visit - .. automethod:: visitkeys - .. automethod:: visitvalues - .. automethod:: visititems - .. automethod:: tree - .. automethod:: create_group - .. automethod:: require_group - .. automethod:: create_groups - .. automethod:: require_groups - .. automethod:: create_dataset - .. automethod:: require_dataset - .. automethod:: create - .. automethod:: empty - .. automethod:: zeros - .. automethod:: ones - .. automethod:: full - .. automethod:: array - .. automethod:: empty_like - .. automethod:: zeros_like - .. automethod:: ones_like - .. automethod:: full_like - .. automethod:: move diff --git a/docs/api/index.rst b/docs/api/index.rst new file mode 100644 index 0000000000..8735180cd9 --- /dev/null +++ b/docs/api/index.rst @@ -0,0 +1,7 @@ +API Reference +============= + +.. toctree:: + :maxdepth: 1 + + ../_autoapi/zarr/index diff --git a/docs/api/n5.rst b/docs/api/n5.rst deleted file mode 100644 index b6a8d8c61e..0000000000 --- a/docs/api/n5.rst +++ /dev/null @@ -1,5 +0,0 @@ -N5 (``zarr.n5``) -================ -.. automodule:: zarr.n5 - -.. autoclass:: N5Store diff --git a/docs/api/storage.rst b/docs/api/storage.rst deleted file mode 100644 index 4321837449..0000000000 --- a/docs/api/storage.rst +++ /dev/null @@ -1,50 +0,0 @@ -Storage (``zarr.storage``) -========================== -.. automodule:: zarr.storage - -.. autoclass:: MemoryStore -.. autoclass:: DirectoryStore -.. autoclass:: TempStore -.. autoclass:: NestedDirectoryStore -.. autoclass:: ZipStore - - .. automethod:: close - .. automethod:: flush - -.. autoclass:: DBMStore - - .. automethod:: close - .. automethod:: flush - -.. autoclass:: LMDBStore - - .. automethod:: close - .. automethod:: flush - -.. autoclass:: SQLiteStore - - .. automethod:: close - -.. autoclass:: MongoDBStore -.. autoclass:: RedisStore -.. autoclass:: LRUStoreCache - - .. automethod:: invalidate - .. automethod:: invalidate_values - .. automethod:: invalidate_keys - -.. autoclass:: ABSStore - -.. autoclass:: FSStore - -.. autoclass:: ConsolidatedMetadataStore - -.. autofunction:: init_array -.. autofunction:: init_group -.. autofunction:: contains_array -.. autofunction:: contains_group -.. autofunction:: listdir -.. autofunction:: rmdir -.. autofunction:: getsize -.. autofunction:: rename -.. autofunction:: migrate_1to2 diff --git a/docs/api/sync.rst b/docs/api/sync.rst deleted file mode 100644 index a139805e78..0000000000 --- a/docs/api/sync.rst +++ /dev/null @@ -1,6 +0,0 @@ -Synchronization (``zarr.sync``) -=============================== -.. module:: zarr.sync - -.. autoclass:: ThreadSynchronizer -.. autoclass:: ProcessSynchronizer diff --git a/docs/api/v3.rst b/docs/api/v3.rst deleted file mode 100644 index 3503e3fe81..0000000000 --- a/docs/api/v3.rst +++ /dev/null @@ -1,76 +0,0 @@ -V3 Specification Implementation(``zarr._storage.v3``) -===================================================== - -This module contains an experimental implementation of the `Zarr V3 Specification `_. - -.. warning:: - The experimental v3 implementation included in Zarr Python >2.12,<3 is not aligned with the final - V3 specification. This version is deprecated and will be removed in Zarr Python 3.0 in favor of a - spec compliant version. - -The new ``zarr._store.v3`` package has the necessary classes and functions for evaluating Zarr V3. -Since the design is not finalised, the classes and functions are not automatically imported into -the regular Zarr namespace. - -Code snippet for creating Zarr V3 arrays:: - - >>> import zarr - >>> z = zarr.create((10000, 10000), - >>> chunks=(100, 100), - >>> dtype='f8', - >>> compressor='default', - >>> path='path-where-you-want-zarr-v3-array', - >>> zarr_version=3) - -Further, you can use `z.info` to see details about the array you just created:: - - >>> z.info - Name : path-where-you-want-zarr-v3-array - Type : zarr.core.Array - Data type : float64 - Shape : (10000, 10000) - Chunk shape : (100, 100) - Order : C - Read-only : False - Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : zarr._storage.v3.KVStoreV3 - No. bytes : 800000000 (762.9M) - No. bytes stored : 557 - Storage ratio : 1436265.7 - Chunks initialized : 0/10000 - -You can also check ``Store type`` here (which indicates Zarr V3). - -.. module:: zarr._storage.v3 - -.. autoclass:: RmdirV3 -.. autoclass:: KVStoreV3 -.. autoclass:: FSStoreV3 -.. autoclass:: MemoryStoreV3 -.. autoclass:: DirectoryStoreV3 -.. autoclass:: ZipStoreV3 -.. autoclass:: RedisStoreV3 -.. autoclass:: MongoDBStoreV3 -.. autoclass:: DBMStoreV3 -.. autoclass:: LMDBStoreV3 -.. autoclass:: SQLiteStoreV3 -.. autoclass:: LRUStoreCacheV3 -.. autoclass:: ConsolidatedMetadataStoreV3 - -In v3 `storage transformers `_ -can be set via ``zarr.create(…, storage_transformers=[…])``. -The experimental sharding storage transformer can be tested by setting -the environment variable ``ZARR_V3_SHARDING=1``. Data written with this flag -enabled should be expected to become stale until -`ZEP 2 `_ is approved -and fully implemented. - -.. module:: zarr._storage.v3_storage_transformers - -.. autoclass:: ShardingStorageTransformer - -The abstract base class for storage transformers is - -.. module:: zarr._storage.store - -.. autoclass:: StorageTransformer diff --git a/docs/conf.py b/docs/conf.py index 136fcf32d6..0a328ac25f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,10 +15,14 @@ import os import sys +from typing import Any + +import sphinx.application from importlib.metadata import version as get_version import zarr +import sphinx # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -39,19 +43,22 @@ "sphinx.ext.autosummary", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", - "sphinx_automodapi.automodapi", + 'autoapi.extension', "numpydoc", "sphinx_issues", "sphinx_copybutton", "sphinx_design", ] -numpydoc_show_class_members = False -numpydoc_class_members_toctree = False issues_github_path = "zarr-developers/zarr-python" -automodapi_inheritance_diagram = False -automodapi_toctreedirnm = "_autoapi" +autoapi_dirs = ['../src/zarr'] +autoapi_add_toctree_entry = False +autoapi_generate_api_docs = True +autoapi_member_order = "groupwise" +autoapi_root = "_autoapi" +autoapi_keep_files = True +autoapi_options = [ 'members', 'undoc-members', 'show-inheritance', 'show-module-summary', 'imported-members', ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -159,11 +166,10 @@ # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = "_static/logo1.png" +html_logo = "_static/logo_horizontal.svg" -# Add custom css -def setup(app): +def setup(app: sphinx.application.Sphinx) -> None: app.add_css_file("custom.css") @@ -245,6 +251,8 @@ def setup(app): # Output file base name for HTML help builder. htmlhelp_basename = "zarrdoc" +maximum_signature_line_length = 80 + # -- Options for LaTeX output --------------------------------------------- latex_elements = { @@ -330,7 +338,7 @@ def setup(app): # use in refs e.g: # :ref:`comparison manual ` intersphinx_mapping = { - "python": ("https://docs.python.org/", None), + "python": ("https://docs.python.org/3/", None), "numpy": ("https://numpy.org/doc/stable/", None), "numcodecs": ("https://numcodecs.readthedocs.io/en/stable/", None), } diff --git a/docs/contributing.rst b/docs/contributing.rst index 91606b7276..a65b3d104d 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -335,7 +335,7 @@ of the storage specification that is currently implemented is stored under the Note that the Zarr test suite includes a data fixture and tests to try and ensure that data format compatibility is not accidentally broken. See the -:func:`test_format_compatibility` function in the :mod:`zarr.tests.test_storage` module +:func:`test_format_compatibility` function in the :mod:`tests.test_storage` module for details. When to make a release diff --git a/docs/guide/consolidated_metadata.rst b/docs/guide/consolidated_metadata.rst new file mode 100644 index 0000000000..5010d32481 --- /dev/null +++ b/docs/guide/consolidated_metadata.rst @@ -0,0 +1,74 @@ +Consolidated Metadata +===================== + +Zarr-Python implements the `Consolidated Metadata_` extension to the Zarr Spec. +Consolidated metadata can reduce the time needed to load the metadata for an +entire hierarchy, especially when the metadata is being served over a network. +Consolidated metadata essentially stores all the metadata for a hierarchy in the +metadata of the root Group. + +Usage +----- + +If consolidated metadata is present in a Zarr Group's metadata then it is used +by default. The initial read to open the group will need to communicate with +the store (reading from a file for a :class:`zarr.storage.LocalStore`, making a +network request for a :class:`zarr.storage.RemoteStore`). After that, any subsequent +metadata reads get child Group or Array nodes will *not* require reads from the store. + +In Python, the consolidated metadata is available on the ``.consolidated_metadata`` +attribute of the ``GroupMetadata`` object. + +.. code-block:: python + + >>> import zarr + >>> store = zarr.storage.MemoryStore({}, mode="w") + >>> group = zarr.open_group(store=store) + >>> group.create_array(shape=(1,), name="a") + >>> group.create_array(shape=(2, 2), name="b") + >>> group.create_array(shape=(3, 3, 3), name="c") + >>> zarr.consolidate_metadata(store) + +If we open that group, the Group's metadata has a :class:`zarr.ConsolidatedMetadata` +that can be used. + +.. code-block:: python + + >>> consolidated = zarr.open_group(store=store) + >>> consolidated.metadata.consolidated_metadata.metadata + {'b': ArrayV3Metadata(shape=(2, 2), fill_value=np.float64(0.0), ...), + 'a': ArrayV3Metadata(shape=(1,), fill_value=np.float64(0.0), ...), + 'c': ArrayV3Metadata(shape=(3, 3, 3), fill_value=np.float64(0.0), ...)} + +Operations on the group to get children automatically use the consolidated metadata. + +.. code-block:: python + + >>> consolidated["a"] # no read / HTTP request to the Store is required + + +With nested groups, the consolidated metadata is available on the children, recursively. + +... code-block:: python + + >>> child = group.create_group("child", attributes={"kind": "child"}) + >>> grandchild = child.create_group("child", attributes={"kind": "grandchild"}) + >>> consolidated = zarr.consolidate_metadata(store) + + >>> consolidated["child"].metadata.consolidated_metadata + ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, )}, ...) + +Synchronization and Concurrency +------------------------------- + +Consolidated metadata is intended for read-heavy use cases on slowly changing +hierarchies. For hierarchies where new nodes are constantly being added, +removed, or modified, consolidated metadata may not be desirable. + +1. It will add some overhead to each update operation, since the metadata + would need to be re-consolidated to keep it in sync with the store. +2. Readers using consolidated metadata will regularly see a "past" version + of the metadata, at the time they read the root node with its consolidated + metadata. + +.. _Consolidated Metadata: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#consolidated-metadata \ No newline at end of file diff --git a/docs/guide/index.rst b/docs/guide/index.rst new file mode 100644 index 0000000000..f841dbb85d --- /dev/null +++ b/docs/guide/index.rst @@ -0,0 +1,8 @@ +Guide +===== + +.. toctree:: + :maxdepth: 1 + + storage + consolidated_metadata diff --git a/docs/guide/storage.rst b/docs/guide/storage.rst new file mode 100644 index 0000000000..dfda553c43 --- /dev/null +++ b/docs/guide/storage.rst @@ -0,0 +1,101 @@ +Storage +======= + +Zarr-Python supports multiple storage backends, including: local file systems, +Zip files, remote stores via ``fspec`` (S3, HTTP, etc.), and in-memory stores. In +Zarr-Python 3, stores must implement the abstract store API from +:class:`zarr.abc.store.Store`. + +.. note:: + Unlike Zarr-Python 2 where the store interface was built around a generic ``MutableMapping`` + API, Zarr-Python 3 utilizes a custom store API that utilizes Python's AsyncIO library. + +Implicit Store Creation +----------------------- + +In most cases, it is not required to create a ``Store`` object explicitly. Passing a string +to Zarr's top level API will result in the store being created automatically. + +.. code-block:: python + + >>> import zarr + >>> zarr.open("data/foo/bar", mode="r") # implicitly creates a LocalStore + + >>> zarr.open("s3://foo/bar", mode="r") # implicitly creates a RemoteStore + + >>> data = {} + >>> zarr.open(data, mode="w") # implicitly creates a MemoryStore + + +Explicit Store Creation +----------------------- + +In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four +built-in store: :class:`zarr.storage.LocalStore`, :class:`zarr.storage.RemoteStore`, +:class:`zarr.storage.ZipStore`, and :class:`zarr.storage.MemoryStore`. + +Local Store +~~~~~~~~~~~ + +The :class:`zarr.storage.LocalStore` stores data in a nested set of directories on a local +filesystem. + +.. code-block:: python + + >>> import zarr + >>> store = zarr.storage.LocalStore("data/foo/bar", mode="r") + >>> zarr.open(store=store) + + +Zip Store +~~~~~~~~~ + +The :class:`zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single +Zip file. The `Zip Store specification_` is currently in draft form. + +.. code-block:: python + + >>> import zarr + >>> store = zarr.storage.ZipStore("data.zip", mode="w") + >>> zarr.open(store=store, shape=(2,)) + >> import zarr + >>> store = zarr.storage.RemoteStore("gs://foo/bar", mode="r") + >>> zarr.open(store=store) + shape=(10, 20) dtype=float32> + +Memory Store +~~~~~~~~~~~~ + +The :class:`zarr.storage.RemoteStore` a in-memory store that allows for serialization of +Zarr data (metadata and chunks) to a dictionary. + +.. code-block:: python + + >>> import zarr + >>> data = {} + >>> store = zarr.storage.MemoryStore(data, mode="w") + >>> zarr.open(store=store, shape=(2, )) + + +Developing custom stores +------------------------ + +Zarr-Python :class:`zarr.abc.store.Store` API is meant to be extended. The Store Abstract Base +Class includes all of the methods needed to be a fully operational store in Zarr Python. +Zarr also provides a test harness for custom stores: :class:`zarr.testing.store.StoreTests`. + +.. _Zip Store Specification: https://github.com/zarr-developers/zarr-specs/pull/311 +.. _Fsspec: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#consolidated-metadata diff --git a/docs/index.rst b/docs/index.rst index cf54e261af..6b90b5a773 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -10,11 +10,11 @@ Zarr-Python getting_started tutorial - api + guide/index + api/index spec release license - acknowledgments contributing **Version**: |version| @@ -80,7 +80,7 @@ Zarr is a file storage format for chunked, compressed, N-dimensional arrays base +++ - .. button-ref:: api + .. button-ref:: api/index :expand: :color: dark :click-parent: diff --git a/docs/installation.rst b/docs/installation.rst index 35865c764d..86da6d1035 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -24,13 +24,4 @@ latest GitHub main:: $ pip install git+https://github.com/zarr-developers/zarr-python.git -To work with Zarr source code in development, install from GitHub:: - - $ git clone --recursive https://github.com/zarr-developers/zarr-python.git - $ cd zarr-python - $ python -m pip install -e . - -To verify that Zarr has been fully installed, run the test suite:: - - $ pip install pytest - $ python -m pytest -v --pyargs zarr +To work with Zarr source code in development, see `Contributing `_. \ No newline at end of file diff --git a/docs/release.rst b/docs/release.rst index a62d6a653c..0b6775c4a6 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,19 +18,290 @@ Release notes See `GH1777 `_ for more details on the upcoming 3.0 release. -.. _release_2.18.4: +.. release_3.0.0-alpha: -2.18.4 (unreleased) -------------------- +3.0.0-alpha +----------- + +.. warning:: + Zarr-Python 3.0.0-alpha is a pre-release of the upcoming 3.0 release. This release is not feature complete or + expected to be ready for production applications. + +.. note:: + The complete release notes for 3.0 have not been added to this document yet. See the + `3.0.0-alpha `_ release on GitHub + for a record of changes included in this release. Enhancements ~~~~~~~~~~~~ +* Implement listing of the sub-arrays and sub-groups for a V3 ``Group``. + By :user:`Davis Bennett ` :issue:`1726`. + +* Bootstrap v3 branch with zarrita. + By :user:`Joe Hamman ` :issue:`1584`. + +* Extensible codecs for V3. + By :user:`Norman Rzepka ` :issue:`1588`. + +* Don't import from tests. + By :user:`Davis Bennett ` :issue:`1601`. + +* Listable V3 Stores. + By :user:`Joe Hamman ` :issue:`1634`. + +* Codecs without array metadata. + By :user:`Norman Rzepka ` :issue:`1632`. + +* fix sync group class methods. + By :user:`Joe Hamman ` :issue:`1652`. + +* implement eq for LocalStore. + By :user:`Charoula Kyriakides ` :issue:`1792`. + +* V3 reorg. + By :user:`Joe Hamman ` :issue:`1809`. + +* [v3] Sync with futures. + By :user:`Davis Bennett ` :issue:`1804`. + +* implement group.members. + By :user:`Davis Bennett ` :issue:`1726`. + +* Remove implicit groups. + By :user:`Joe Hamman ` :issue:`1827`. + +* feature(store): ``list_*`` -> AsyncGenerators. + By :user:`Joe Hamman ` :issue:`1844`. + +* Test codec entrypoints. + By :user:`Norman Rzepka ` :issue:`1835`. + +* Remove extra v3 sync module. + By :user:`Max Jones ` :issue:`1856`. + +* Use donfig for V3 configuration. + By :user:`Max Jones ` :issue:`1655`. + +* groundwork for V3 group tests. + By :user:`Davis Bennett ` :issue:`1743`. + +* [v3] First step to generalizes ndarray and bytes. + By :user:`Mads R. B. Kristensen ` :issue:`1826`. + +* Reworked codec pipelines. + By :user:`Norman Rzepka ` :issue:`1670`. + +* Followup on codecs. + By :user:`Norman Rzepka ` :issue:`1889`. + +* Protocols for Buffer and NDBuffer. + By :user:`Mads R. B. Kristensen ` :issue:`1899`. + +* [V3] Expand store tests. + By :user:`Davis Bennett ` :issue:`1900`. + +* [v3] Feature: Store open mode. + By :user:`Joe Hamman ` :issue:`1911`. + +* fix(types): Group.info -> NotImplementedError. + By :user:`Joe Hamman ` :issue:`1936`. + +* feature(typing): add py.typed file to package root. + By :user:`Joe Hamman ` :issue:`1935`. + +* Support all indexing variants. + By :user:`Norman Rzepka ` :issue:`1917`. + +* Feature: group and array name properties. + By :user:`Joe Hamman ` :issue:`1940`. + +* implement .chunks on v3 arrays. + By :user:`Ryan Abernathey ` :issue:`1929`. + +* Fixes bug in transpose. + By :user:`Norman Rzepka ` :issue:`1949`. + +* Buffer Prototype Argument. + By :user:`Mads R. B. Kristensen ` :issue:`1910`. + +* Feature: Top level V3 API. + By :user:`Joe Hamman ` :issue:`1884`. + +* Basic working FsspecStore. + By :user:`Martin Durant `; :issue:`1785`. + +Typing +~~~~~~ + +* Resolve Mypy errors in v3 branch. + By :user:`Daniel Jahn ` :issue:`1692`. + +* Allow dmypy to be run on v3 branch. + By :user:`David Stansby ` :issue:`1780`. + +* Remove unused typing ignore comments. + By :user:`David Stansby ` :issue:`1781`. + +* Check untyped defs on v3. + By :user:`David Stansby ` :issue:`1784`. + +* [v3] Enable some more strict mypy options. + By :user:`David Stansby ` :issue:`1793`. + +* [v3] Disallow generic Any typing. + By :user:`David Stansby ` :issue:`1794`. + +* Disallow incomplete type definitions. + By :user:`David Stansby ` :issue:`1814`. + +* Disallow untyped calls. + By :user:`David Stansby ` :issue:`1811`. + +* Fix some untyped calls. + By :user:`David Stansby ` :issue:`1865`. + +* Disallow untyped defs. + By :user:`David Stansby ` :issue:`1834`. + +* Add more typing to zarr.group. + By :user:`David Stansby ` :issue:`1870`. + +* Fix any generics in zarr.array. + By :user:`David Stansby ` :issue:`1861`. + +* Remove some unused mypy overrides. + By :user:`David Stansby ` :issue:`1894`. + +* Finish typing zarr.metadata. + By :user:`David Stansby ` :issue:`1880`. + +* Disallow implicit re-exports. + By :user:`David Stansby ` :issue:`1908`. + +* Make typing strict. + By :user:`David Stansby ` :issue:`1879`. + +* Enable extra mypy error codes. + By :user:`David Stansby ` :issue:`1909`. + +* Enable warn_unreachable for mypy. + By :user:`David Stansby ` :issue:`1937`. + +* Fix final typing errors. + By :user:`David Stansby ` :issue:`1939`. + Maintenance ~~~~~~~~~~~ -Deprecations -~~~~~~~~~~~~ +* Remedy a situation where ``zarr-python`` was importing ``DummyStorageTransformer`` from the test suite. + The dependency relationship is now reversed: the test suite imports this class from ``zarr-python``. + By :user:`Davis Bennett ` :issue:`1601`. + +* [V3] Update minimum supported Python and Numpy versions. + By :user:`Joe Hamman ` :issue:`1638` + +* use src layout and use hatch for packaging. + By :user:`Davis Bennett ` :issue:`1592`. + +* temporarily disable mypy in v3 directory. + By :user:`Joe Hamman ` :issue:`1649`. + +* create hatch test env. + By :user:`Ryan Abernathey ` :issue:`1650`. + +* removed unused environments and workflows. + By :user:`Ryan Abernathey ` :issue:`1651`. + +* Add env variables to sprint setup instructions. + By :user:`Max Jones ` :issue:`1654`. + +* Add test matrix for V3. + By :user:`Max Jones ` :issue:`1656`. + +* Remove attrs. + By :user:`Davis Bennett ` :issue:`1660`. + +* Specify hatch envs using GitHub actions matrix for v3 tests. + By :user:`Max Jones ` :issue:`1728`. + +* black -> ruff format + cleanup. + By :user:`Saransh Chopra ` :issue:`1639`. + +* Remove old v3. + By :user:`Davis Bennett ` :issue:`1742`. + +* V3 update pre commit. + By :user:`Joe Hamman ` :issue:`1808`. + +* remove windows testing on v3 branch. + By :user:`Joe Hamman ` :issue:`1817`. + +* fix: add mypy to test dependencies. + By :user:`Davis Bennett ` :issue:`1789`. + +* chore(ci): add numpy 2 release candidate to test matrix. + By :user:`Joe Hamman ` :issue:`1828`. + +* fix dependencies. + By :user:`Norman Rzepka ` :issue:`1840`. + +* Add pytest to mypy dependencies. + By :user:`David Stansby ` :issue:`1846`. + +* chore(pre-commit): update pre-commit versions and remove attrs dep mypy section. + By :user:`Joe Hamman ` :issue:`1848`. + +* Enable some ruff rules (RUF) and fix issues. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1869`. + +* Configure Ruff to apply flake8-bugbear/isort/pyupgrade. + By :user:`Norman Rzepka ` :issue:`1890`. + +* chore(ci): remove mypy from test action in favor of pre-commit action. + By :user:`Joe Hamman ` :issue:`1887`. + +* Enable ruff/flake8-raise rules (RSE) and fix issues. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1872`. + +* Apply assorted ruff/refurb rules (FURB). + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1873`. + +* Enable ruff/flake8-implicit-str-concat rules (ISC) and fix issues. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1868`. + +* Add numpy to mypy pre-commit check env. + By :user:`David Stansby ` :issue:`1893`. + +* remove fixture files from src. + By :user:`Davis Bennett ` :issue:`1897`. + +* Fix list of packages in mypy pre-commit environment. + By :user:`David Stansby ` :issue:`1907`. + +* Run sphinx directly on readthedocs. + By :user:`David Stansby ` :issue:`1919`. + +* Apply preview ruff rules. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1942`. + +* Enable and apply ruff rule RUF009. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1941`. + +Documentation +~~~~~~~~~~~~~ + +* Specify docs hatch env for v3 branch. + By :user:`Max Jones ` :issue:`1655`. + +* Development installation/contributing docs updates. + By :user:`Alden Keefe Sampson ` :issue:`1643`. + +* chore: update project settings per scientific python repo-review. + By :user:`Joe Hamman ` :issue:`1863`. + +* doc: update release notes for 3.0.0.alpha. + By :user:`Joe Hamman ` :issue:`1959`. .. _release_2.18.3: @@ -904,7 +1175,7 @@ Documentation * Update docs to use ``python -m pytest``. By :user:`Ray Bell ` :issue:`923`. -* Fix versionadded tag in zarr.core.Array docstring. +* Fix versionadded tag in zarr.Array docstring. By :user:`Juan Nunez-Iglesias ` :issue:`852`. * Doctest seem to be stricter now, updating tostring() to tobytes(). @@ -1658,7 +1929,7 @@ Enhancements :user:`John Kirkham `, :issue:`92`, :issue:`122`. * **Viewing an array as a different dtype**. The ``Array`` class has a new - :func:`zarr.core.Array.astype` method, which is a convenience that enables an + :func:`zarr.Array.astype` method, which is a convenience that enables an array to be viewed as a different dtype. By :user:`John Kirkham `, :issue:`94`, :issue:`96`. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 214dd4f63f..619392a175 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -18,7 +18,7 @@ Zarr has several functions for creating arrays. For example:: >>> import zarr >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z - + The code above creates a 2-dimensional array of 32-bit integers with 10000 rows and 10000 columns, divided into chunks where each chunk has 1000 rows and 1000 @@ -168,7 +168,7 @@ compression ratio. Zarr arrays provide a ``info`` property which can be used to print some diagnostics, e.g.:: >>> z.info - Type : zarr.core.Array + Type : zarr.Array Data type : int32 Shape : (10000, 10000) Chunk shape : (1000, 1000) @@ -260,7 +260,7 @@ Here is an example using a delta filter with the Blosc compressor:: >>> data = np.arange(100000000, dtype='i4').reshape(10000, 10000) >>> z = zarr.array(data, chunks=(1000, 1000), filters=filters, compressor=compressor) >>> z.info - Type : zarr.core.Array + Type : zarr.Array Data type : int32 Shape : (10000, 10000) Chunk shape : (1000, 1000) @@ -302,7 +302,7 @@ Groups can also contain arrays, e.g.:: >>> z1 = bar.zeros('baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z1 - + Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the ``create_dataset()`` and ``require_dataset()`` @@ -310,7 +310,7 @@ methods, e.g.:: >>> z = bar.create_dataset('quux', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z - + Members of a group can be accessed via the suffix notation, e.g.:: @@ -323,7 +323,7 @@ call, e.g.:: >>> root['foo/bar'] >>> root['foo/bar/baz'] - + The :func:`zarr.hierarchy.Group.tree` method can be used to print a tree representation of the hierarchy, e.g.:: @@ -344,7 +344,7 @@ sub-directories, e.g.:: >>> z = root.zeros('foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z - + Groups can be used as context managers (in a ``with`` statement). If the underlying store has a ``close`` method, it will be called on exit. @@ -388,7 +388,7 @@ property. E.g.:: >>> bar.info Name : /foo/bar - Type : zarr.core.Array + Type : zarr.Array Data type : int64 Shape : (1000000,) Chunk shape : (100000,) @@ -403,7 +403,7 @@ property. E.g.:: >>> baz.info Name : /foo/baz - Type : zarr.core.Array + Type : zarr.Array Data type : float32 Shape : (1000, 1000) Chunk shape : (100, 100) @@ -472,7 +472,7 @@ Note that although this functionality is similar to some of the advanced indexing capabilities available on NumPy arrays and on h5py datasets, **the Zarr API for advanced indexing is different from both NumPy and h5py**, so please read this section carefully. For a complete description of the indexing API, -see the documentation for the :class:`zarr.core.Array` class. +see the documentation for the :class:`zarr.Array` class. Indexing with coordinate arrays ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -880,10 +880,10 @@ Here is an example using S3Map to read an array created previously:: >>> root = zarr.group(store=store) >>> z = root['foo/bar/baz'] >>> z - + >>> z.info Name : /foo/bar/baz - Type : zarr.core.Array + Type : zarr.Array Data type : |S1 Shape : (21,) Chunk shape : (7,) @@ -1176,7 +1176,7 @@ your array, then you can use an array with a fixed-length bytes dtype. E.g.:: >>> z = zarr.zeros(10, dtype='S6') >>> z - + >>> z[0] = b'Hello' >>> z[1] = b'world!' >>> z[:] @@ -1192,7 +1192,7 @@ A fixed-length unicode dtype is also available, e.g.:: >>> text_data = greetings * 10000 >>> z = zarr.array(text_data, dtype='U20') >>> z - + >>> z[:] array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ..., 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], @@ -1208,7 +1208,7 @@ E.g. using ``VLenUTF8``:: >>> import numcodecs >>> z = zarr.array(text_data, dtype=object, object_codec=numcodecs.VLenUTF8()) >>> z - + >>> z.filters [VLenUTF8()] >>> z[:] @@ -1220,7 +1220,7 @@ is a short-hand for ``dtype=object, object_codec=numcodecs.VLenUTF8()``, e.g.:: >>> z = zarr.array(text_data, dtype=str) >>> z - + >>> z.filters [VLenUTF8()] >>> z[:] @@ -1236,7 +1236,7 @@ e.g.:: >>> bytes_data = [g.encode('utf-8') for g in greetings] * 10000 >>> z = zarr.array(bytes_data, dtype=bytes) >>> z - + >>> z.filters [VLenBytes()] >>> z[:] @@ -1251,7 +1251,7 @@ integer. E.g.:: >>> categorize = numcodecs.Categorize(greetings, dtype=object) >>> z = zarr.array(text_data, dtype=object, object_codec=categorize) >>> z - + >>> z.filters [Categorize(dtype='|O', astype='|u1', labels=['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ...])] >>> z[:] @@ -1301,7 +1301,7 @@ and stores the same primitive type (a.k.a. a ragged array), the >>> z = zarr.empty(4, dtype=object, object_codec=numcodecs.VLenArray(int)) >>> z - + >>> z.filters [VLenArray(dtype='>> z[0] = np.array([1, 3, 5]) @@ -1317,7 +1317,7 @@ primitive dtype such as 'i4' or 'f8'. E.g.:: >>> z = zarr.empty(4, dtype='array:i8') >>> z - + >>> z.filters [VLenArray(dtype='>> z[0] = np.array([1, 3, 5]) @@ -1393,7 +1393,7 @@ ratios, depending on the correlation structure within the data. E.g.:: >>> a = np.arange(100000000, dtype='i4').reshape(10000, 10000).T >>> c = zarr.array(a, chunks=(1000, 1000)) >>> c.info - Type : zarr.core.Array + Type : zarr.Array Data type : int32 Shape : (10000, 10000) Chunk shape : (1000, 1000) @@ -1407,7 +1407,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Chunks initialized : 100/100 >>> f = zarr.array(a, chunks=(1000, 1000), order='F') >>> f.info - Type : zarr.core.Array + Type : zarr.Array Data type : int32 Shape : (10000, 10000) Chunk shape : (1000, 1000) @@ -1575,7 +1575,7 @@ with thread synchronization:: >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4', ... synchronizer=zarr.ThreadSynchronizer()) >>> z - + This array is safe to read or write within a multi-threaded program. @@ -1589,7 +1589,7 @@ some networked file systems). E.g.:: ... chunks=(1000, 1000), dtype='i4', ... synchronizer=synchronizer) >>> z - + This array is safe to read or write from multiple processes. @@ -1657,7 +1657,7 @@ arrays, as long as the units are specified. E.g.:: >>> z = zarr.array(['2007-07-13', '2006-01-13', '2010-08-13'], dtype='M8[D]') >>> z - + >>> z[:] array(['2007-07-13', '2006-01-13', '2010-08-13'], dtype='datetime64[D]') >>> z[0] diff --git a/environment.yml b/environment.yml deleted file mode 100644 index ff2f9eedef..0000000000 --- a/environment.yml +++ /dev/null @@ -1,14 +0,0 @@ -channels: - - conda-forge - - defaults -dependencies: - - wheel - - numcodecs >= 0.6.4 - - numpy >= 1.21 - - pip - - pip: - - asciitree - - fasteners - - pytest - - pytest-timeout - - setuptools_scm diff --git a/fixture/.zattrs b/fixture/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/.zgroup b/fixture/.zgroup deleted file mode 100644 index 3b7daf227c..0000000000 --- a/fixture/.zgroup +++ /dev/null @@ -1,3 +0,0 @@ -{ - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/0/.zattrs b/fixture/0/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/0/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/0/.zgroup b/fixture/0/.zgroup deleted file mode 100644 index 3b7daf227c..0000000000 --- a/fixture/0/.zgroup +++ /dev/null @@ -1,3 +0,0 @@ -{ - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/0/0/.zarray b/fixture/0/0/.zarray deleted file mode 100644 index a5ceafaf51..0000000000 --- a/fixture/0/0/.zarray +++ /dev/null @@ -1,14 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": null, - "dtype": "|i1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 1111 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/0/0/.zattrs b/fixture/0/0/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/0/0/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/0/0/0 b/fixture/0/0/0 deleted file mode 100644 index e2ff041306..0000000000 Binary files a/fixture/0/0/0 and /dev/null differ diff --git a/fixture/0/0/1 b/fixture/0/0/1 deleted file mode 100644 index 78f25e295b..0000000000 --- a/fixture/0/0/1 +++ /dev/null @@ -1 +0,0 @@ -defghijklmnopqrstuvwxyz{|}~ \ No newline at end of file diff --git a/fixture/0/0/10 b/fixture/0/0/10 deleted file mode 100644 index 8877030829..0000000000 Binary files a/fixture/0/0/10 and /dev/null differ diff --git a/fixture/0/0/11 b/fixture/0/0/11 deleted file mode 100644 index 3cea8e9b57..0000000000 Binary files a/fixture/0/0/11 and /dev/null differ diff --git a/fixture/0/0/2 b/fixture/0/0/2 deleted file mode 100644 index a83c7e1089..0000000000 Binary files a/fixture/0/0/2 and /dev/null differ diff --git a/fixture/0/0/3 b/fixture/0/0/3 deleted file mode 100644 index 8c0210c510..0000000000 --- a/fixture/0/0/3 +++ /dev/null @@ -1 +0,0 @@ -,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file diff --git a/fixture/0/0/4 b/fixture/0/0/4 deleted file mode 100644 index 660224ac06..0000000000 --- a/fixture/0/0/4 +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/fixture/0/0/5 b/fixture/0/0/5 deleted file mode 100644 index 9881b13bab..0000000000 Binary files a/fixture/0/0/5 and /dev/null differ diff --git a/fixture/0/0/6 b/fixture/0/0/6 deleted file mode 100644 index 1e8a425fa5..0000000000 --- a/fixture/0/0/6 +++ /dev/null @@ -1 +0,0 @@ -XYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file diff --git a/fixture/0/0/7 b/fixture/0/0/7 deleted file mode 100644 index c0723b01b9..0000000000 Binary files a/fixture/0/0/7 and /dev/null differ diff --git a/fixture/0/0/8 b/fixture/0/0/8 deleted file mode 100644 index a38dc0f2ed..0000000000 --- a/fixture/0/0/8 +++ /dev/null @@ -1 +0,0 @@ - !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file diff --git a/fixture/0/0/9 b/fixture/0/0/9 deleted file mode 100644 index dd735849d9..0000000000 --- a/fixture/0/0/9 +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/fixture/0/1/.zarray b/fixture/0/1/.zarray deleted file mode 100644 index 5bb56828ec..0000000000 --- a/fixture/0/1/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "zlib", - "level": 1 - }, - "dtype": "|i1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 1111 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/0/1/.zattrs b/fixture/0/1/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/0/1/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/0/1/0 b/fixture/0/1/0 deleted file mode 100644 index 15265bc589..0000000000 Binary files a/fixture/0/1/0 and /dev/null differ diff --git a/fixture/0/1/1 b/fixture/0/1/1 deleted file mode 100644 index ce1eab1e87..0000000000 Binary files a/fixture/0/1/1 and /dev/null differ diff --git a/fixture/0/1/10 b/fixture/0/1/10 deleted file mode 100644 index c078b2d562..0000000000 Binary files a/fixture/0/1/10 and /dev/null differ diff --git a/fixture/0/1/11 b/fixture/0/1/11 deleted file mode 100644 index 6023b8df53..0000000000 Binary files a/fixture/0/1/11 and /dev/null differ diff --git a/fixture/0/1/2 b/fixture/0/1/2 deleted file mode 100644 index 50ebada272..0000000000 Binary files a/fixture/0/1/2 and /dev/null differ diff --git a/fixture/0/1/3 b/fixture/0/1/3 deleted file mode 100644 index 972deb8eb7..0000000000 Binary files a/fixture/0/1/3 and /dev/null differ diff --git a/fixture/0/1/4 b/fixture/0/1/4 deleted file mode 100644 index 73036acec7..0000000000 Binary files a/fixture/0/1/4 and /dev/null differ diff --git a/fixture/0/1/5 b/fixture/0/1/5 deleted file mode 100644 index d529b5becf..0000000000 Binary files a/fixture/0/1/5 and /dev/null differ diff --git a/fixture/0/1/6 b/fixture/0/1/6 deleted file mode 100644 index 0a70e9d853..0000000000 Binary files a/fixture/0/1/6 and /dev/null differ diff --git a/fixture/0/1/7 b/fixture/0/1/7 deleted file mode 100644 index d77fbab72d..0000000000 Binary files a/fixture/0/1/7 and /dev/null differ diff --git a/fixture/0/1/8 b/fixture/0/1/8 deleted file mode 100644 index eba245b537..0000000000 Binary files a/fixture/0/1/8 and /dev/null differ diff --git a/fixture/0/1/9 b/fixture/0/1/9 deleted file mode 100644 index 384417a247..0000000000 Binary files a/fixture/0/1/9 and /dev/null differ diff --git a/fixture/0/2/.zarray b/fixture/0/2/.zarray deleted file mode 100644 index 29781e09b4..0000000000 --- a/fixture/0/2/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "|i1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 1111 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/0/2/.zattrs b/fixture/0/2/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/0/2/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/0/2/0 b/fixture/0/2/0 deleted file mode 100644 index abd664663b..0000000000 Binary files a/fixture/0/2/0 and /dev/null differ diff --git a/fixture/0/2/1 b/fixture/0/2/1 deleted file mode 100644 index 1564164b4e..0000000000 Binary files a/fixture/0/2/1 and /dev/null differ diff --git a/fixture/0/2/10 b/fixture/0/2/10 deleted file mode 100644 index cab8dd9562..0000000000 Binary files a/fixture/0/2/10 and /dev/null differ diff --git a/fixture/0/2/11 b/fixture/0/2/11 deleted file mode 100644 index 68767304c9..0000000000 Binary files a/fixture/0/2/11 and /dev/null differ diff --git a/fixture/0/2/2 b/fixture/0/2/2 deleted file mode 100644 index 974b1371aa..0000000000 Binary files a/fixture/0/2/2 and /dev/null differ diff --git a/fixture/0/2/3 b/fixture/0/2/3 deleted file mode 100644 index c35d1c72df..0000000000 Binary files a/fixture/0/2/3 and /dev/null differ diff --git a/fixture/0/2/4 b/fixture/0/2/4 deleted file mode 100644 index 6a3be3ab38..0000000000 Binary files a/fixture/0/2/4 and /dev/null differ diff --git a/fixture/0/2/5 b/fixture/0/2/5 deleted file mode 100644 index 19c8db1db5..0000000000 Binary files a/fixture/0/2/5 and /dev/null differ diff --git a/fixture/0/2/6 b/fixture/0/2/6 deleted file mode 100644 index d643cd509e..0000000000 Binary files a/fixture/0/2/6 and /dev/null differ diff --git a/fixture/0/2/7 b/fixture/0/2/7 deleted file mode 100644 index df4e24e128..0000000000 Binary files a/fixture/0/2/7 and /dev/null differ diff --git a/fixture/0/2/8 b/fixture/0/2/8 deleted file mode 100644 index 7cd5d6ef34..0000000000 Binary files a/fixture/0/2/8 and /dev/null differ diff --git a/fixture/0/2/9 b/fixture/0/2/9 deleted file mode 100644 index 2f6acf8879..0000000000 Binary files a/fixture/0/2/9 and /dev/null differ diff --git a/fixture/0/3/.zarray b/fixture/0/3/.zarray deleted file mode 100644 index 72f8a611b7..0000000000 --- a/fixture/0/3/.zarray +++ /dev/null @@ -1,19 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "clevel": 1, - "cname": "zstd", - "id": "blosc", - "shuffle": 0 - }, - "dtype": "|i1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 1111 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/0/3/.zattrs b/fixture/0/3/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/0/3/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/0/3/0 b/fixture/0/3/0 deleted file mode 100644 index fba4f86574..0000000000 Binary files a/fixture/0/3/0 and /dev/null differ diff --git a/fixture/0/3/1 b/fixture/0/3/1 deleted file mode 100644 index c9e50670fd..0000000000 Binary files a/fixture/0/3/1 and /dev/null differ diff --git a/fixture/0/3/10 b/fixture/0/3/10 deleted file mode 100644 index 778ada21c8..0000000000 Binary files a/fixture/0/3/10 and /dev/null differ diff --git a/fixture/0/3/11 b/fixture/0/3/11 deleted file mode 100644 index 059c531ef1..0000000000 Binary files a/fixture/0/3/11 and /dev/null differ diff --git a/fixture/0/3/2 b/fixture/0/3/2 deleted file mode 100644 index ced542b0e6..0000000000 Binary files a/fixture/0/3/2 and /dev/null differ diff --git a/fixture/0/3/3 b/fixture/0/3/3 deleted file mode 100644 index 0b88a8dbf2..0000000000 Binary files a/fixture/0/3/3 and /dev/null differ diff --git a/fixture/0/3/4 b/fixture/0/3/4 deleted file mode 100644 index f040a33d2b..0000000000 Binary files a/fixture/0/3/4 and /dev/null differ diff --git a/fixture/0/3/5 b/fixture/0/3/5 deleted file mode 100644 index 23f36c034d..0000000000 Binary files a/fixture/0/3/5 and /dev/null differ diff --git a/fixture/0/3/6 b/fixture/0/3/6 deleted file mode 100644 index 100978fe7d..0000000000 Binary files a/fixture/0/3/6 and /dev/null differ diff --git a/fixture/0/3/7 b/fixture/0/3/7 deleted file mode 100644 index 5c35201d19..0000000000 Binary files a/fixture/0/3/7 and /dev/null differ diff --git a/fixture/0/3/8 b/fixture/0/3/8 deleted file mode 100644 index 2e8c78de94..0000000000 Binary files a/fixture/0/3/8 and /dev/null differ diff --git a/fixture/0/3/9 b/fixture/0/3/9 deleted file mode 100644 index 47ae524b63..0000000000 Binary files a/fixture/0/3/9 and /dev/null differ diff --git a/fixture/0/4/.zarray b/fixture/0/4/.zarray deleted file mode 100644 index 44f2b6b28d..0000000000 --- a/fixture/0/4/.zarray +++ /dev/null @@ -1,19 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "clevel": 1, - "cname": "zstd", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "|i1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 1111 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/0/4/.zattrs b/fixture/0/4/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/0/4/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/0/4/0 b/fixture/0/4/0 deleted file mode 100644 index 2f2e353cdb..0000000000 Binary files a/fixture/0/4/0 and /dev/null differ diff --git a/fixture/0/4/1 b/fixture/0/4/1 deleted file mode 100644 index 66aa292396..0000000000 Binary files a/fixture/0/4/1 and /dev/null differ diff --git a/fixture/0/4/10 b/fixture/0/4/10 deleted file mode 100644 index 7152729709..0000000000 Binary files a/fixture/0/4/10 and /dev/null differ diff --git a/fixture/0/4/11 b/fixture/0/4/11 deleted file mode 100644 index 34487e2221..0000000000 Binary files a/fixture/0/4/11 and /dev/null differ diff --git a/fixture/0/4/2 b/fixture/0/4/2 deleted file mode 100644 index 6a4f50a5c6..0000000000 Binary files a/fixture/0/4/2 and /dev/null differ diff --git a/fixture/0/4/3 b/fixture/0/4/3 deleted file mode 100644 index e880807229..0000000000 Binary files a/fixture/0/4/3 and /dev/null differ diff --git a/fixture/0/4/4 b/fixture/0/4/4 deleted file mode 100644 index 361412ed89..0000000000 Binary files a/fixture/0/4/4 and /dev/null differ diff --git a/fixture/0/4/5 b/fixture/0/4/5 deleted file mode 100644 index ff99c56a58..0000000000 Binary files a/fixture/0/4/5 and /dev/null differ diff --git a/fixture/0/4/6 b/fixture/0/4/6 deleted file mode 100644 index 8d942d9f59..0000000000 Binary files a/fixture/0/4/6 and /dev/null differ diff --git a/fixture/0/4/7 b/fixture/0/4/7 deleted file mode 100644 index f06f5a69ef..0000000000 Binary files a/fixture/0/4/7 and /dev/null differ diff --git a/fixture/0/4/8 b/fixture/0/4/8 deleted file mode 100644 index 949d6169a9..0000000000 Binary files a/fixture/0/4/8 and /dev/null differ diff --git a/fixture/0/4/9 b/fixture/0/4/9 deleted file mode 100644 index e8e7bfbc6f..0000000000 Binary files a/fixture/0/4/9 and /dev/null differ diff --git a/fixture/0/5/.zarray b/fixture/0/5/.zarray deleted file mode 100644 index beafdea229..0000000000 --- a/fixture/0/5/.zarray +++ /dev/null @@ -1,19 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "clevel": 1, - "cname": "zstd", - "id": "blosc", - "shuffle": 2 - }, - "dtype": "|i1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 1111 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/0/5/.zattrs b/fixture/0/5/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/0/5/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/0/5/0 b/fixture/0/5/0 deleted file mode 100644 index be5fbdb799..0000000000 Binary files a/fixture/0/5/0 and /dev/null differ diff --git a/fixture/0/5/1 b/fixture/0/5/1 deleted file mode 100644 index a533973891..0000000000 Binary files a/fixture/0/5/1 and /dev/null differ diff --git a/fixture/0/5/10 b/fixture/0/5/10 deleted file mode 100644 index 8da499ccdc..0000000000 Binary files a/fixture/0/5/10 and /dev/null differ diff --git a/fixture/0/5/11 b/fixture/0/5/11 deleted file mode 100644 index dccc6ff075..0000000000 Binary files a/fixture/0/5/11 and /dev/null differ diff --git a/fixture/0/5/2 b/fixture/0/5/2 deleted file mode 100644 index b23ccb7a41..0000000000 Binary files a/fixture/0/5/2 and /dev/null differ diff --git a/fixture/0/5/3 b/fixture/0/5/3 deleted file mode 100644 index 1a91655f65..0000000000 Binary files a/fixture/0/5/3 and /dev/null differ diff --git a/fixture/0/5/4 b/fixture/0/5/4 deleted file mode 100644 index 12218977e5..0000000000 Binary files a/fixture/0/5/4 and /dev/null differ diff --git a/fixture/0/5/5 b/fixture/0/5/5 deleted file mode 100644 index e99841ecc9..0000000000 Binary files a/fixture/0/5/5 and /dev/null differ diff --git a/fixture/0/5/6 b/fixture/0/5/6 deleted file mode 100644 index 8b5793f721..0000000000 Binary files a/fixture/0/5/6 and /dev/null differ diff --git a/fixture/0/5/7 b/fixture/0/5/7 deleted file mode 100644 index 4d51d23803..0000000000 Binary files a/fixture/0/5/7 and /dev/null differ diff --git a/fixture/0/5/8 b/fixture/0/5/8 deleted file mode 100644 index 5c709708a3..0000000000 Binary files a/fixture/0/5/8 and /dev/null differ diff --git a/fixture/0/5/9 b/fixture/0/5/9 deleted file mode 100644 index 45ff8d600a..0000000000 Binary files a/fixture/0/5/9 and /dev/null differ diff --git a/fixture/0/6/.zarray b/fixture/0/6/.zarray deleted file mode 100644 index 5cdc9c43ce..0000000000 --- a/fixture/0/6/.zarray +++ /dev/null @@ -1,19 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "clevel": 1, - "cname": "lz4", - "id": "blosc", - "shuffle": 0 - }, - "dtype": "|i1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 1111 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/0/6/.zattrs b/fixture/0/6/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/0/6/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/0/6/0 b/fixture/0/6/0 deleted file mode 100644 index 3a1472f269..0000000000 Binary files a/fixture/0/6/0 and /dev/null differ diff --git a/fixture/0/6/1 b/fixture/0/6/1 deleted file mode 100644 index 1cdc89f84d..0000000000 Binary files a/fixture/0/6/1 and /dev/null differ diff --git a/fixture/0/6/10 b/fixture/0/6/10 deleted file mode 100644 index fd646f5b2a..0000000000 Binary files a/fixture/0/6/10 and /dev/null differ diff --git a/fixture/0/6/11 b/fixture/0/6/11 deleted file mode 100644 index 145a3e3312..0000000000 Binary files a/fixture/0/6/11 and /dev/null differ diff --git a/fixture/0/6/2 b/fixture/0/6/2 deleted file mode 100644 index 3531ed7d2f..0000000000 Binary files a/fixture/0/6/2 and /dev/null differ diff --git a/fixture/0/6/3 b/fixture/0/6/3 deleted file mode 100644 index bf93587bc7..0000000000 Binary files a/fixture/0/6/3 and /dev/null differ diff --git a/fixture/0/6/4 b/fixture/0/6/4 deleted file mode 100644 index 777a1e0096..0000000000 Binary files a/fixture/0/6/4 and /dev/null differ diff --git a/fixture/0/6/5 b/fixture/0/6/5 deleted file mode 100644 index d83297db06..0000000000 Binary files a/fixture/0/6/5 and /dev/null differ diff --git a/fixture/0/6/6 b/fixture/0/6/6 deleted file mode 100644 index 3658bddb50..0000000000 Binary files a/fixture/0/6/6 and /dev/null differ diff --git a/fixture/0/6/7 b/fixture/0/6/7 deleted file mode 100644 index deb2e30b64..0000000000 Binary files a/fixture/0/6/7 and /dev/null differ diff --git a/fixture/0/6/8 b/fixture/0/6/8 deleted file mode 100644 index aad890f334..0000000000 Binary files a/fixture/0/6/8 and /dev/null differ diff --git a/fixture/0/6/9 b/fixture/0/6/9 deleted file mode 100644 index 402ae9f831..0000000000 Binary files a/fixture/0/6/9 and /dev/null differ diff --git a/fixture/1/.zattrs b/fixture/1/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/1/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/1/.zgroup b/fixture/1/.zgroup deleted file mode 100644 index 3b7daf227c..0000000000 --- a/fixture/1/.zgroup +++ /dev/null @@ -1,3 +0,0 @@ -{ - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/1/0/.zarray b/fixture/1/0/.zarray deleted file mode 100644 index 30d16d3590..0000000000 --- a/fixture/1/0/.zarray +++ /dev/null @@ -1,14 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": null, - "dtype": "?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file diff --git a/fixture/1/0/4 b/fixture/1/0/4 deleted file mode 100644 index 75c13b9686..0000000000 --- a/fixture/1/0/4 +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/fixture/1/0/5 b/fixture/1/0/5 deleted file mode 100644 index e7e7c3894f..0000000000 Binary files a/fixture/1/0/5 and /dev/null differ diff --git a/fixture/1/0/6 b/fixture/1/0/6 deleted file mode 100644 index 6efa2226c7..0000000000 --- a/fixture/1/0/6 +++ /dev/null @@ -1 +0,0 @@ -XYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file diff --git a/fixture/1/0/7 b/fixture/1/0/7 deleted file mode 100644 index 40b6edb0e8..0000000000 Binary files a/fixture/1/0/7 and /dev/null differ diff --git a/fixture/1/0/8 b/fixture/1/0/8 deleted file mode 100644 index 28159ad9ad..0000000000 --- a/fixture/1/0/8 +++ /dev/null @@ -1 +0,0 @@ - !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file diff --git a/fixture/1/0/9 b/fixture/1/0/9 deleted file mode 100644 index c5223e6cb7..0000000000 --- a/fixture/1/0/9 +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/fixture/1/1/.zarray b/fixture/1/1/.zarray deleted file mode 100644 index a97385dba8..0000000000 --- a/fixture/1/1/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "zlib", - "level": 1 - }, - "dtype": "4և?;?HS?&&?w?`t?3. ?jO ?ںq? Z?\\?T?(LG?N??Ο2?}?uAk"?I$I$?ݮ'?3})?ÄPz+? g.?j&E0?=w|R3?55?v=8?iܲ:?aB(=?_ ??2\ B?sD?FF? \ No newline at end of file diff --git a/fixture/10/0/18 b/fixture/10/0/18 deleted file mode 100644 index 6723998f09..0000000000 Binary files a/fixture/10/0/18 and /dev/null differ diff --git a/fixture/10/0/19 b/fixture/10/0/19 deleted file mode 100644 index d4939e2039..0000000000 Binary files a/fixture/10/0/19 and /dev/null differ diff --git a/fixture/10/0/2 b/fixture/10/0/2 deleted file mode 100644 index ca09d8e730..0000000000 Binary files a/fixture/10/0/2 and /dev/null differ diff --git a/fixture/10/0/20 b/fixture/10/0/20 deleted file mode 100644 index 1557320928..0000000000 Binary files a/fixture/10/0/20 and /dev/null differ diff --git a/fixture/10/0/21 b/fixture/10/0/21 deleted file mode 100644 index ba57279a90..0000000000 Binary files a/fixture/10/0/21 and /dev/null differ diff --git a/fixture/10/0/22 b/fixture/10/0/22 deleted file mode 100644 index 51a98b25b1..0000000000 Binary files a/fixture/10/0/22 and /dev/null differ diff --git a/fixture/10/0/23 b/fixture/10/0/23 deleted file mode 100644 index 93aaf87e1b..0000000000 Binary files a/fixture/10/0/23 and /dev/null differ diff --git a/fixture/10/0/24 b/fixture/10/0/24 deleted file mode 100644 index 8e16623744..0000000000 Binary files a/fixture/10/0/24 and /dev/null differ diff --git a/fixture/10/0/25 b/fixture/10/0/25 deleted file mode 100644 index c43b3e6f7b..0000000000 --- a/fixture/10/0/25 +++ /dev/null @@ -1,2 +0,0 @@ -Peu?r?z+1`?MC ?!J ?Wb?55?-?n׃ ?B(=?y^ ?ɯĀ?i*?k"k"?c$?7 [V'? -^N)?ݮ'A,?.?Pz+1?X3X3?+6?B#8?ғ_;?v=?y5T??LaB? D D?'LG?xI?pQ7L?n*N?Ak"Q?S? V V?]NX?ȳZ?bm]?6P;_? Wb?Jd?BgBg? i?W{-l?*5n?LGq?s?'v?xxxxxx?K1z?Cc}?j?ƻ]N? uÄ?m]8?@@?B#?O ?q?n׃?aB(=?5n??4TnY? Ο?9D?V'?)x9/?j??je6?w?K z?]g?Jd?3ں?OO?lvĿ??/d:?Bɯ?撢/%?[?4?``?4և?'A,p?w?ȳZ?m]?Uj&E?((? 0?\R? Z?vĿ?JO~%?7{??AVf?c? \ No newline at end of file diff --git a/fixture/10/0/26 b/fixture/10/0/26 deleted file mode 100644 index edd19678d3..0000000000 Binary files a/fixture/10/0/26 and /dev/null differ diff --git a/fixture/10/0/27 b/fixture/10/0/27 deleted file mode 100644 index 64162766cb..0000000000 Binary files a/fixture/10/0/27 and /dev/null differ diff --git a/fixture/10/0/28 b/fixture/10/0/28 deleted file mode 100644 index 7b802c59ef..0000000000 Binary files a/fixture/10/0/28 and /dev/null differ diff --git a/fixture/10/0/29 b/fixture/10/0/29 deleted file mode 100644 index 203b228a58..0000000000 Binary files a/fixture/10/0/29 and /dev/null differ diff --git a/fixture/10/0/3 b/fixture/10/0/3 deleted file mode 100644 index bc547b9434..0000000000 Binary files a/fixture/10/0/3 and /dev/null differ diff --git a/fixture/10/0/30 b/fixture/10/0/30 deleted file mode 100644 index 88173dc6c1..0000000000 Binary files a/fixture/10/0/30 and /dev/null differ diff --git a/fixture/10/0/31 b/fixture/10/0/31 deleted file mode 100644 index 427b2b0f05..0000000000 Binary files a/fixture/10/0/31 and /dev/null differ diff --git a/fixture/10/0/32 b/fixture/10/0/32 deleted file mode 100644 index f09a926dc3..0000000000 Binary files a/fixture/10/0/32 and /dev/null differ diff --git a/fixture/10/0/33 b/fixture/10/0/33 deleted file mode 100644 index 97fe96ef78..0000000000 Binary files a/fixture/10/0/33 and /dev/null differ diff --git a/fixture/10/0/4 b/fixture/10/0/4 deleted file mode 100644 index 854be0ff01..0000000000 Binary files a/fixture/10/0/4 and /dev/null differ diff --git a/fixture/10/0/5 b/fixture/10/0/5 deleted file mode 100644 index 9c2fd9c4f6..0000000000 Binary files a/fixture/10/0/5 and /dev/null differ diff --git a/fixture/10/0/6 b/fixture/10/0/6 deleted file mode 100644 index 0c04006d60..0000000000 Binary files a/fixture/10/0/6 and /dev/null differ diff --git a/fixture/10/0/7 b/fixture/10/0/7 deleted file mode 100644 index ddfa980a18..0000000000 Binary files a/fixture/10/0/7 and /dev/null differ diff --git a/fixture/10/0/8 b/fixture/10/0/8 deleted file mode 100644 index 7a783b85c2..0000000000 Binary files a/fixture/10/0/8 and /dev/null differ diff --git a/fixture/10/0/9 b/fixture/10/0/9 deleted file mode 100644 index af34267355..0000000000 Binary files a/fixture/10/0/9 and /dev/null differ diff --git a/fixture/10/1/.zarray b/fixture/10/1/.zarray deleted file mode 100644 index 22d0521bb4..0000000000 --- a/fixture/10/1/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "zlib", - "level": 1 - }, - "dtype": "9>DD.G۽*}+2>v]{P{P<}ҖyLT"|=WYJycJx8Y|tx)еș?[Ui^(fٯUY 1SryQ YyJPrIh竐JeCM*AѢe/w&z/"]^'Sj#eu[Ў÷ӷ1̬Fh3épN4QˈPԁF3A񺠳BmbNrE{kƆ{f`R"|Ѧ?찒Г6@?'msHp-pр 90WI~NIcNrt!h|E.wEw lLq<ńkg]K^ 'JmsǤe}>C3 YO5?g { \ No newline at end of file diff --git a/fixture/10/1/1 b/fixture/10/1/1 deleted file mode 100644 index 02b8839bbe..0000000000 Binary files a/fixture/10/1/1 and /dev/null differ diff --git a/fixture/10/1/10 b/fixture/10/1/10 deleted file mode 100644 index cb60d899ef..0000000000 Binary files a/fixture/10/1/10 and /dev/null differ diff --git a/fixture/10/1/11 b/fixture/10/1/11 deleted file mode 100644 index d56a6b14d9..0000000000 Binary files a/fixture/10/1/11 and /dev/null differ diff --git a/fixture/10/1/12 b/fixture/10/1/12 deleted file mode 100644 index fd2f26059f..0000000000 Binary files a/fixture/10/1/12 and /dev/null differ diff --git a/fixture/10/1/13 b/fixture/10/1/13 deleted file mode 100644 index 2842e2d286..0000000000 Binary files a/fixture/10/1/13 and /dev/null differ diff --git a/fixture/10/1/14 b/fixture/10/1/14 deleted file mode 100644 index 0c5434ef4e..0000000000 --- a/fixture/10/1/14 +++ /dev/null @@ -1,3 +0,0 @@ -xO=斚c%IPi<{!պݲ:6@pyT15y9kls5+ b/UV¥ԍ;݄nקs_b{ lbofh/-gW ">1B4*@7a6Ϙ%Zg1dVNeL{ox:Skl6ѿ5V;GW3t'3|;2n4pxgƸx 7#&vdp\nFsL.0V~*QXdg[!aь=Fqlg7M`l|`**3\*w(ţ2csLKN&O\Ilv:bD`X%.wlbc \ No newline at end of file diff --git a/fixture/10/1/15 b/fixture/10/1/15 deleted file mode 100644 index 1e89e57760..0000000000 --- a/fixture/10/1/15 +++ /dev/null @@ -1 +0,0 @@ -x%OuNf!TEtQ"]^)KB.]V&H9TF(2o6,z~{پ|RCVβ6St7щKbձw0>@%"P;r_\%@=jRfCyVp,ˆMW)ԕБ=ﳁքciwCV!K!i8lH8פQۙ0$%$BZk#OWK#!l]LXLjAAXoe#0Ґ^;A҈ ֍!|3#YUcA4*iWu>Ԣ1!PT-(`{+~g/\&* :* M+JBćZi\צ:B]e!a˼c̈́:ۢ[ %7yuU~z?{g/[`}A G6> !1]3O1[14/߻1e?XfTFc}V8k6 v;"hVWTƸ~]N*,-c_9c C- Ð4e"g 3ҥ0-ft$)c$Y:%0lFFp_*czpL3; sRXP<*Ћ4wQa1pIX<_w \ No newline at end of file diff --git a/fixture/10/1/16 b/fixture/10/1/16 deleted file mode 100644 index d5dc7cbf4d..0000000000 Binary files a/fixture/10/1/16 and /dev/null differ diff --git a/fixture/10/1/17 b/fixture/10/1/17 deleted file mode 100644 index 340d9e9f3e..0000000000 Binary files a/fixture/10/1/17 and /dev/null differ diff --git a/fixture/10/1/18 b/fixture/10/1/18 deleted file mode 100644 index 64de2010c8..0000000000 Binary files a/fixture/10/1/18 and /dev/null differ diff --git a/fixture/10/1/19 b/fixture/10/1/19 deleted file mode 100644 index e565f92a85..0000000000 Binary files a/fixture/10/1/19 and /dev/null differ diff --git a/fixture/10/1/2 b/fixture/10/1/2 deleted file mode 100644 index 66d3e9c4c0..0000000000 Binary files a/fixture/10/1/2 and /dev/null differ diff --git a/fixture/10/1/20 b/fixture/10/1/20 deleted file mode 100644 index 258a408bc0..0000000000 Binary files a/fixture/10/1/20 and /dev/null differ diff --git a/fixture/10/1/21 b/fixture/10/1/21 deleted file mode 100644 index 1dea1a1966..0000000000 Binary files a/fixture/10/1/21 and /dev/null differ diff --git a/fixture/10/1/22 b/fixture/10/1/22 deleted file mode 100644 index 1112800ad4..0000000000 Binary files a/fixture/10/1/22 and /dev/null differ diff --git a/fixture/10/1/23 b/fixture/10/1/23 deleted file mode 100644 index 8668791927..0000000000 Binary files a/fixture/10/1/23 and /dev/null differ diff --git a/fixture/10/1/24 b/fixture/10/1/24 deleted file mode 100644 index 45e2b33cca..0000000000 Binary files a/fixture/10/1/24 and /dev/null differ diff --git a/fixture/10/1/25 b/fixture/10/1/25 deleted file mode 100644 index 96fee7e4b6..0000000000 Binary files a/fixture/10/1/25 and /dev/null differ diff --git a/fixture/10/1/26 b/fixture/10/1/26 deleted file mode 100644 index 385173c4fe..0000000000 Binary files a/fixture/10/1/26 and /dev/null differ diff --git a/fixture/10/1/27 b/fixture/10/1/27 deleted file mode 100644 index d5a1e919e9..0000000000 Binary files a/fixture/10/1/27 and /dev/null differ diff --git a/fixture/10/1/28 b/fixture/10/1/28 deleted file mode 100644 index f5814dd451..0000000000 Binary files a/fixture/10/1/28 and /dev/null differ diff --git a/fixture/10/1/29 b/fixture/10/1/29 deleted file mode 100644 index c65ec5a0f7..0000000000 Binary files a/fixture/10/1/29 and /dev/null differ diff --git a/fixture/10/1/3 b/fixture/10/1/3 deleted file mode 100644 index 213eebd6da..0000000000 Binary files a/fixture/10/1/3 and /dev/null differ diff --git a/fixture/10/1/30 b/fixture/10/1/30 deleted file mode 100644 index e905d02969..0000000000 Binary files a/fixture/10/1/30 and /dev/null differ diff --git a/fixture/10/1/31 b/fixture/10/1/31 deleted file mode 100644 index 92ffd9fa57..0000000000 Binary files a/fixture/10/1/31 and /dev/null differ diff --git a/fixture/10/1/32 b/fixture/10/1/32 deleted file mode 100644 index a3848cf61d..0000000000 Binary files a/fixture/10/1/32 and /dev/null differ diff --git a/fixture/10/1/33 b/fixture/10/1/33 deleted file mode 100644 index f3df61f29d..0000000000 Binary files a/fixture/10/1/33 and /dev/null differ diff --git a/fixture/10/1/4 b/fixture/10/1/4 deleted file mode 100644 index 3594417999..0000000000 Binary files a/fixture/10/1/4 and /dev/null differ diff --git a/fixture/10/1/5 b/fixture/10/1/5 deleted file mode 100644 index 4b24bd2df3..0000000000 Binary files a/fixture/10/1/5 and /dev/null differ diff --git a/fixture/10/1/6 b/fixture/10/1/6 deleted file mode 100644 index 8df6aa4ab5..0000000000 Binary files a/fixture/10/1/6 and /dev/null differ diff --git a/fixture/10/1/7 b/fixture/10/1/7 deleted file mode 100644 index 2ac06323d0..0000000000 Binary files a/fixture/10/1/7 and /dev/null differ diff --git a/fixture/10/1/8 b/fixture/10/1/8 deleted file mode 100644 index 226233a422..0000000000 Binary files a/fixture/10/1/8 and /dev/null differ diff --git a/fixture/10/1/9 b/fixture/10/1/9 deleted file mode 100644 index 4b973979c2..0000000000 Binary files a/fixture/10/1/9 and /dev/null differ diff --git a/fixture/10/2/.zarray b/fixture/10/2/.zarray deleted file mode 100644 index d2e6713e1b..0000000000 --- a/fixture/10/2/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "i5G8&9-/>6=.8幗4/7/=^=X<9=6:8U+ \ No newline at end of file diff --git a/fixture/11/0/1 b/fixture/11/0/1 deleted file mode 100644 index 13a9154b89..0000000000 --- a/fixture/11/0/1 +++ /dev/null @@ -1,3 +0,0 @@ -L<>Ĺ;8052U8=F:4u?q<޼w; :}'3??km91|8⽄0y<: \ No newline at end of file diff --git a/fixture/11/0/10 b/fixture/11/0/10 deleted file mode 100644 index 53ef107b49..0000000000 Binary files a/fixture/11/0/10 and /dev/null differ diff --git a/fixture/11/0/11 b/fixture/11/0/11 deleted file mode 100644 index d1cb1e56f1..0000000000 --- a/fixture/11/0/11 +++ /dev/null @@ -1,2 +0,0 @@ -66@8}>%ؽ)=:/,^=ԹD; -96N50=8·O?f->,;d%O87p909W(+=v:ѮC_4434#4e,!Ѻڽv;w7/=-$S:4=;1ȼ \ No newline at end of file diff --git a/fixture/11/0/13 b/fixture/11/0/13 deleted file mode 100644 index 15ff462e5a..0000000000 --- a/fixture/11/0/13 +++ /dev/null @@ -1,2 +0,0 @@ -K6u8Ku6eB=<Lw4:oH=4V⻟=̷E8=?i:q<0ոJ9?ҩ7ղM9u@ݶ8N=96Z8 ->9L:49 >^6TD<< \ No newline at end of file diff --git a/fixture/11/0/14 b/fixture/11/0/14 deleted file mode 100644 index 09b1a1b3c8..0000000000 --- a/fixture/11/0/14 +++ /dev/null @@ -1 +0,0 @@ --+;:%:1<7Ĺ>D9q:J2<:5.C5ռ5Z?*8 96 ;8_U5o1¾02ൣ@=5=,/6w<>A5--8?7Ͼh8;:<ߺ^8o}'Sˮ>@ 6fb \ No newline at end of file diff --git a/fixture/11/0/15 b/fixture/11/0/15 deleted file mode 100644 index 4f8776cf9c..0000000000 Binary files a/fixture/11/0/15 and /dev/null differ diff --git a/fixture/11/0/16 b/fixture/11/0/16 deleted file mode 100644 index 2cbb62bcae..0000000000 Binary files a/fixture/11/0/16 and /dev/null differ diff --git a/fixture/11/0/17 b/fixture/11/0/17 deleted file mode 100644 index 15fafb4084..0000000000 --- a/fixture/11/0/17 +++ /dev/null @@ -1,2 +0,0 @@ -!W&n^<:;?@^A130?&=WϺn ->5w=K<`Ź޾#(8,7:<]2>88_8Ÿ?V4;3K7I?4);:q:>24ḥ0Cw9s<u{:_@rp>4M%">0(..=. \ No newline at end of file diff --git a/fixture/11/0/18 b/fixture/11/0/18 deleted file mode 100644 index d8f260531e..0000000000 Binary files a/fixture/11/0/18 and /dev/null differ diff --git a/fixture/11/0/19 b/fixture/11/0/19 deleted file mode 100644 index 00f8d0cbe4..0000000000 --- a/fixture/11/0/19 +++ /dev/null @@ -1,2 +0,0 @@ -<=];'.<<4 ;l;<8v 4QͲ<8 j2\5޷ŷ/!.O;a<־9=;>*8; -9}!6pt7x?<9;9370;yb2. -b*8E;:`}>g97=P8pU3-:ӿ574L706lm \ No newline at end of file diff --git a/fixture/11/0/2 b/fixture/11/0/2 deleted file mode 100644 index 4299d85edb..0000000000 Binary files a/fixture/11/0/2 and /dev/null differ diff --git a/fixture/11/0/20 b/fixture/11/0/20 deleted file mode 100644 index c2b6993b4e..0000000000 --- a/fixture/11/0/20 +++ /dev/null @@ -1 +0,0 @@ -o=U8c= ?6#<=;<4]<<][:ұl00->w侼G<0;0X:t39:=t4ᴘ9޼>nѳ6~3,:P5˺.<N0'.ƴk9`0<?_(a8u=N㸒0!!b== i;Q3>轨T?޹\<% \ No newline at end of file diff --git a/fixture/11/0/21 b/fixture/11/0/21 deleted file mode 100644 index fb5e25e22c..0000000000 --- a/fixture/11/0/21 +++ /dev/null @@ -1 +0,0 @@ -@%<6@} >>`9m69C<8Sȸ?ʽܽB=K1A>@yGA(45Fn@t8M/fI7@9-S2i6x8?;d29s;9!O.;7b88qi': \ No newline at end of file diff --git a/fixture/11/0/22 b/fixture/11/0/22 deleted file mode 100644 index aa6a8b475c..0000000000 --- a/fixture/11/0/22 +++ /dev/null @@ -1 +0,0 @@ - 0~?;(`\2$8:@RΗw(?[>6P0V5%t8025&:30o6?@I8E6I4‹L9qJbg< \ No newline at end of file diff --git a/fixture/11/0/23 b/fixture/11/0/23 deleted file mode 100644 index d0fe213d8f..0000000000 Binary files a/fixture/11/0/23 and /dev/null differ diff --git a/fixture/11/0/24 b/fixture/11/0/24 deleted file mode 100644 index 1e8f3b67a5..0000000000 --- a/fixture/11/0/24 +++ /dev/null @@ -1 +0,0 @@ -68d+4T=r6/:6< ;5D.1:{:599;ͬܺJ@(;;7h2:6"=i@< L:5V\9<Ṡ448v8V9ej0*863)=86w9̶;<"2\7@3M;14jL=68 \ No newline at end of file diff --git a/fixture/11/0/27 b/fixture/11/0/27 deleted file mode 100644 index bfa91897e8..0000000000 Binary files a/fixture/11/0/27 and /dev/null differ diff --git a/fixture/11/0/28 b/fixture/11/0/28 deleted file mode 100644 index 99ccc787f1..0000000000 Binary files a/fixture/11/0/28 and /dev/null differ diff --git a/fixture/11/0/29 b/fixture/11/0/29 deleted file mode 100644 index 167b98e01d..0000000000 --- a/fixture/11/0/29 +++ /dev/null @@ -1 +0,0 @@ -N7L2+ݺL482ɹE;V8e;*/x9Ѫ׻W$9:wԽBY:65542":q9j?;;6:-B+66=)8C<%889+44 \ No newline at end of file diff --git a/fixture/11/0/3 b/fixture/11/0/3 deleted file mode 100644 index 905f9b7612..0000000000 --- a/fixture/11/0/3 +++ /dev/null @@ -1 +0,0 @@ -˶%<_8<6ۿݷ514_AI91:#o6+4E76R,7Bۼ?<ص̸q@3s%;56+<6Ӵ3&_<˷=35$:y9J/R#>i0Ҽ \ No newline at end of file diff --git a/fixture/11/0/30 b/fixture/11/0/30 deleted file mode 100644 index bc51790d37..0000000000 --- a/fixture/11/0/30 +++ /dev/null @@ -1 +0,0 @@ - ༬KG2?: `9/7{<8X@:$"U8(M5y>b74=HO/ 7Z.9XE@9><19>=8<+:A*\>¸28_6 @a3'f5ܶ-߼e~?V"l4:@<>a@{;.38p<5ƽ974{> \ No newline at end of file diff --git a/fixture/11/0/31 b/fixture/11/0/31 deleted file mode 100644 index 05cb06ebce..0000000000 Binary files a/fixture/11/0/31 and /dev/null differ diff --git a/fixture/11/0/32 b/fixture/11/0/32 deleted file mode 100644 index 1843ec7ad4..0000000000 --- a/fixture/11/0/32 +++ /dev/null @@ -1,3 +0,0 @@ -1}̬ -d8@k5:=;7G40F055&6#9,뵊8#ym.V5-92X,=?.8.B|4c/"7 -:Ȧ9m5;i4׹?;A@1G85^`4:n9*e>׺AF˾g?0~7d169 \ No newline at end of file diff --git a/fixture/11/0/33 b/fixture/11/0/33 deleted file mode 100644 index b10dfd066c..0000000000 Binary files a/fixture/11/0/33 and /dev/null differ diff --git a/fixture/11/0/34 b/fixture/11/0/34 deleted file mode 100644 index fb6593d730..0000000000 Binary files a/fixture/11/0/34 and /dev/null differ diff --git a/fixture/11/0/35 b/fixture/11/0/35 deleted file mode 100644 index c011536cb3..0000000000 --- a/fixture/11/0/35 +++ /dev/null @@ -1,2 +0,0 @@ -cf< ->3j0?S:4>;MԬ>;l0ʹ0?089:;v@./=8{:`:>6<;1?A'>7@8C::`vG8M90>J6=9= +53Q>0B V.:Q(4`639ԯ>A9L38g1ڹ \ No newline at end of file diff --git a/fixture/11/0/36 b/fixture/11/0/36 deleted file mode 100644 index 993c47dc49..0000000000 --- a/fixture/11/0/36 +++ /dev/null @@ -1 +0,0 @@ -8й}5A9_Tz@r>=26<ѹC=~<26)-;[9'/U <{-;@8R>ټR@r \ No newline at end of file diff --git a/fixture/11/0/37 b/fixture/11/0/37 deleted file mode 100644 index 2fc46c23b9..0000000000 --- a/fixture/11/0/37 +++ /dev/null @@ -1 +0,0 @@ -|8lkX=O$ix7R:;I-2;̹N#=-i@g;r6P7D; 88<<뿾]5lU603@~'6Y=d57>F>û>;eY3V+44*b;95O-=! )D8 \ No newline at end of file diff --git a/fixture/11/0/39 b/fixture/11/0/39 deleted file mode 100644 index 263bbde985..0000000000 --- a/fixture/11/0/39 +++ /dev/null @@ -1 +0,0 @@ -7ӵ;n=7jvc8J38⺫ٷ̰Q?$8=;˻A øZ(99;;)(<8B9u >ᰉ:}$;A4#8+6:!:3/=MҺ2I?6Q8<7?`kF9c:1%?8P*289=Q'+=8A \ No newline at end of file diff --git a/fixture/11/0/4 b/fixture/11/0/4 deleted file mode 100644 index ce9d4b174c..0000000000 --- a/fixture/11/0/4 +++ /dev/null @@ -1 +0,0 @@ - 8D29ٽg>:ּ/ p9:=?{::x7 81L70o6)@8967䶭%0Ӵ|:9v<66TB<&P_к<$ \ No newline at end of file diff --git a/fixture/11/0/40 b/fixture/11/0/40 deleted file mode 100644 index 4eb39f7e4d..0000000000 --- a/fixture/11/0/40 +++ /dev/null @@ -1 +0,0 @@ -`,>n*;Q<8!0y28J5P<\,7i8;;߷;2<7D>9~86h׻<9/y/><7=j9\8!8. 38p98,#8=84>4l6Ͻ,8Z7)79(7;;2R \ No newline at end of file diff --git a/fixture/11/0/41 b/fixture/11/0/41 deleted file mode 100644 index 8a93d9202f..0000000000 Binary files a/fixture/11/0/41 and /dev/null differ diff --git a/fixture/11/0/42 b/fixture/11/0/42 deleted file mode 100644 index 5e2efd24b3..0000000000 --- a/fixture/11/0/42 +++ /dev/null @@ -1 +0,0 @@ -=,>4޾Q=2=7649%<:K-\; _G9Ķ2̷⭛64OHD;%736>@4N??96W¼>94;r!X99y<<|M8m8 \ No newline at end of file diff --git a/fixture/11/0/43 b/fixture/11/0/43 deleted file mode 100644 index 00bdfb3836..0000000000 Binary files a/fixture/11/0/43 and /dev/null differ diff --git a/fixture/11/0/44 b/fixture/11/0/44 deleted file mode 100644 index 91d5de0a32..0000000000 Binary files a/fixture/11/0/44 and /dev/null differ diff --git a/fixture/11/0/5 b/fixture/11/0/5 deleted file mode 100644 index 3ec1c53e47..0000000000 Binary files a/fixture/11/0/5 and /dev/null differ diff --git a/fixture/11/0/6 b/fixture/11/0/6 deleted file mode 100644 index e1a103c1d6..0000000000 Binary files a/fixture/11/0/6 and /dev/null differ diff --git a/fixture/11/0/7 b/fixture/11/0/7 deleted file mode 100644 index ad95b008a1..0000000000 --- a/fixture/11/0/7 +++ /dev/null @@ -1,2 +0,0 @@ -9&8Dw9K<55;E8ȥ&,]=j^3,}>?/H2b9Ey&<Ύ94$>A -m41]&ϴ8AX.=[ø e4G<-<<9%<:χU<9=Z.<5 \ No newline at end of file diff --git a/fixture/11/0/8 b/fixture/11/0/8 deleted file mode 100644 index 5763394a9c..0000000000 --- a/fixture/11/0/8 +++ /dev/null @@ -1 +0,0 @@ -A#Ժu-<@B9?Q:35%=kw='T@8] y:!: <<=2ջi5α%!}lԲ;U>y?<18@W0=ɵyi9,-s;4b\;1=/Un:*  1<5ߩa<60;z8}DAA817=V \ No newline at end of file diff --git a/fixture/11/0/9 b/fixture/11/0/9 deleted file mode 100644 index fa532219b7..0000000000 --- a/fixture/11/0/9 +++ /dev/null @@ -1 +0,0 @@ -:H>60v:̽75<*8` 8i<4p541ܵH \ No newline at end of file diff --git a/fixture/11/1/.zarray b/fixture/11/1/.zarray deleted file mode 100644 index f9b2b175cd..0000000000 --- a/fixture/11/1/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "zlib", - "level": 1 - }, - "dtype": "!=j ܾ?k>=A?n?3?唾SþM=1d=Z@q>+Ȇ>7M?92?CIZ?a'?Tfpf]>U$aC>n6zZ~?=?lp? ?1vdVeԾVIR@G>P$?zzV==????p?^?^w@p*>>h@Fv.[Q?z?3$@y!= ܾ \ No newline at end of file diff --git a/fixture/12/0/1 b/fixture/12/0/1 deleted file mode 100644 index 9b9429fe11..0000000000 Binary files a/fixture/12/0/1 and /dev/null differ diff --git a/fixture/12/0/10 b/fixture/12/0/10 deleted file mode 100644 index f83a3a63a9..0000000000 Binary files a/fixture/12/0/10 and /dev/null differ diff --git a/fixture/12/0/11 b/fixture/12/0/11 deleted file mode 100644 index 3e466a751e..0000000000 Binary files a/fixture/12/0/11 and /dev/null differ diff --git a/fixture/12/0/12 b/fixture/12/0/12 deleted file mode 100644 index 489b1377b4..0000000000 Binary files a/fixture/12/0/12 and /dev/null differ diff --git a/fixture/12/0/13 b/fixture/12/0/13 deleted file mode 100644 index 8afa5f2795..0000000000 Binary files a/fixture/12/0/13 and /dev/null differ diff --git a/fixture/12/0/14 b/fixture/12/0/14 deleted file mode 100644 index 8d55d29a4e..0000000000 Binary files a/fixture/12/0/14 and /dev/null differ diff --git a/fixture/12/0/15 b/fixture/12/0/15 deleted file mode 100644 index 8f26830f6b..0000000000 Binary files a/fixture/12/0/15 and /dev/null differ diff --git a/fixture/12/0/16 b/fixture/12/0/16 deleted file mode 100644 index 4235871468..0000000000 --- a/fixture/12/0/16 +++ /dev/null @@ -1,3 +0,0 @@ -D*?7?"A3ɾI>?C:t,ݸ?*d?E?>>H씾dN ?x>(An4^_x3?9,?+ ?Q >Ӂ_/9m=\T9?=G9Į>DV @{8>X?U?ʿоXX>Y&?T>?w?= ? `?@^ -߽\|s?|׿i?t ?pee?ῠq_ -y?ǃI=E?s}v&̾V'>D[84?D␾"?N?2O?k|X?;9@@K?|R?Wf? \ No newline at end of file diff --git a/fixture/12/0/17 b/fixture/12/0/17 deleted file mode 100644 index 68d2a64bd1..0000000000 --- a/fixture/12/0/17 +++ /dev/null @@ -1 +0,0 @@ -~?$V?4?4ugg?T?9#?~Gx?q>[lz???->?_!np?AQDT]?G7?WU@.>b)' >>׹>F?D)J>@kŕh>柱=J ?2d>e?#-?=,k > ;j1]>Ţ?*Y?%?9@>#?{>;>8+?' >3_>`f;?+F`ڿ0>yag?׿8v?+R?[=b_2ܿu>anQ2:?HR?r.&˾1,ſo?K?O?]>t޿@6>x >I෾?b ?`UE? \ No newline at end of file diff --git a/fixture/12/0/18 b/fixture/12/0/18 deleted file mode 100644 index 97165ea436..0000000000 --- a/fixture/12/0/18 +++ /dev/null @@ -1,3 +0,0 @@ ->_>M?J^s(>fvH>(?+??: v?F=>;? ʾ wF??+7 ->w?ۨ| -'?O?[o0a>"?;@f?5þ{˿?4?m~>E?1DqĔO?p?If?u?r7b=BûD>oX=j>F">@߱?0=e]>F?R\?OEr??Z?Q;-q>6;?i8p5?%Qȿ">s>#;=a= *& x??T׾Իǿ+WG.%?8ǽ1+{>ՓI@T?ֽ?5xn.CƳ=򊥿ꜾZ_?Y -j 3>O?35 ?w>oē6-?S?@r.q`?ſ?Oz>E3?/w%#>sZ|OzNŚ?μ> K*m_?<b2???BA}>XGŵc>>#@ I3K?>4-?'N>Q[??R>_8s,@8?'#?>'Nѿǿ̾ h⪾g?pɋәl(?%?> >vk>!Ƕ?g>G?J־{G+v,a? \ No newline at end of file diff --git a/fixture/12/0/2 b/fixture/12/0/2 deleted file mode 100644 index c65cb4faf6..0000000000 Binary files a/fixture/12/0/2 and /dev/null differ diff --git a/fixture/12/0/20 b/fixture/12/0/20 deleted file mode 100644 index 1fa5d6a28c..0000000000 Binary files a/fixture/12/0/20 and /dev/null differ diff --git a/fixture/12/0/21 b/fixture/12/0/21 deleted file mode 100644 index b5e98ec68f..0000000000 Binary files a/fixture/12/0/21 and /dev/null differ diff --git a/fixture/12/0/22 b/fixture/12/0/22 deleted file mode 100644 index 44a174c497..0000000000 --- a/fixture/12/0/22 +++ /dev/null @@ -1,2 +0,0 @@ -*N?i;-'q*W?n$>U"*2:I-#9?S A>>i;?؎? ÿ&t@.># ?apm?P?,lw=ATo?9=o>/ ?>ܰE?ݾ|d?m&?w?=]P?f?Ҿ> |3@o*? -@TWpA|< Ϳu2Ti}dE?ϛIf??mq>׿ M1|>NHz(7p؄ӾV>7>|?!>>X6ajӾ(:"*sXW?;?iCď???E :(>JZ>?t)W)A* \ No newline at end of file diff --git a/fixture/12/0/23 b/fixture/12/0/23 deleted file mode 100644 index 923ef95c25..0000000000 Binary files a/fixture/12/0/23 and /dev/null differ diff --git a/fixture/12/0/24 b/fixture/12/0/24 deleted file mode 100644 index aea1365f3b..0000000000 Binary files a/fixture/12/0/24 and /dev/null differ diff --git a/fixture/12/0/25 b/fixture/12/0/25 deleted file mode 100644 index 44910af4de..0000000000 Binary files a/fixture/12/0/25 and /dev/null differ diff --git a/fixture/12/0/26 b/fixture/12/0/26 deleted file mode 100644 index c4ecd62445..0000000000 Binary files a/fixture/12/0/26 and /dev/null differ diff --git a/fixture/12/0/27 b/fixture/12/0/27 deleted file mode 100644 index e79cd846aa..0000000000 --- a/fixture/12/0/27 +++ /dev/null @@ -1,2 +0,0 @@ -?(K?iy?dտD~(;B?ZC?n)l* _'>MֿT}c}?{TP>H?S8I?D??Cj>~1%=OF?:B?7)?Ӑ?9T1ɾ?g>>#?aJ'?>/?_g>?!;>Onӓ;g-׾@;N>?:S޾?M>7;G+? kZJ>?>M?(p> -8c>秫C-cjTҿH>gl>d?o`"?r|<0@ !A7n'?h@?vIm]?g鐿0>>>> \ No newline at end of file diff --git a/fixture/12/0/28 b/fixture/12/0/28 deleted file mode 100644 index d1cb366014..0000000000 Binary files a/fixture/12/0/28 and /dev/null differ diff --git a/fixture/12/0/29 b/fixture/12/0/29 deleted file mode 100644 index 4fd9f62569..0000000000 --- a/fixture/12/0/29 +++ /dev/null @@ -1,3 +0,0 @@ -?mze?Du5[]?Ĝ ?Խ=5NR\6e>ŢXq?E?S/Ñ?Ҿ6Ռ?9?H?Q>D\?9 >顳?k>N?dUluJP?GV9?f?=&qq@=w>w] U?'>ׇ>?T,= ?ݶ>hJ?,B?vfڿ"E>Xk?WѾ @Vs f -?.l>4z&?l>=?nGC?s!}Z1 ->JTP?Q!?@?z>13?jf٢?6Kvb?h?nj@?$?Y|B@?s>Xž9E?v_ \ No newline at end of file diff --git a/fixture/12/0/3 b/fixture/12/0/3 deleted file mode 100644 index 4b438ca798..0000000000 Binary files a/fixture/12/0/3 and /dev/null differ diff --git a/fixture/12/0/30 b/fixture/12/0/30 deleted file mode 100644 index ddfb4c0542..0000000000 Binary files a/fixture/12/0/30 and /dev/null differ diff --git a/fixture/12/0/31 b/fixture/12/0/31 deleted file mode 100644 index 1ba45c2c63..0000000000 Binary files a/fixture/12/0/31 and /dev/null differ diff --git a/fixture/12/0/32 b/fixture/12/0/32 deleted file mode 100644 index a53c50a212..0000000000 Binary files a/fixture/12/0/32 and /dev/null differ diff --git a/fixture/12/0/33 b/fixture/12/0/33 deleted file mode 100644 index 57dad6a2ff..0000000000 --- a/fixture/12/0/33 +++ /dev/null @@ -1,3 +0,0 @@ -r>rp?L@4ZW(ޝ?\>z >rҖ?YB5>+>˦1=I?0 -=v=˿?FD^?(d 15]>q??T?W=JmZ?}E>+ ?K|=(W?")>j}?z -><=~zc=?3[ξi#ZϾԶ=8zuj"=?t?ͧ ? }Cde=p?ſ6z>6?ѾW=P?)͐q=k?ň?q?;n{E>PE?ֆ'Kw=^3=?!F?oJږ_?+7=ᐿF?C@˖&$ \ No newline at end of file diff --git a/fixture/12/0/34 b/fixture/12/0/34 deleted file mode 100644 index 3c685db58a..0000000000 Binary files a/fixture/12/0/34 and /dev/null differ diff --git a/fixture/12/0/35 b/fixture/12/0/35 deleted file mode 100644 index 6565391175..0000000000 --- a/fixture/12/0/35 +++ /dev/null @@ -1,3 +0,0 @@ ->?g)? -@3/>`1^:@t?[Xc?qp+?T?V t? (Hk']p=%*xd?!?,h>7e+˿Y?%;VJ!,??/?eCbm>7>?f?owG @ig?>=wM>NKM>>G9*ѿUVMc?BʿA> 7F>IΰҼ隿aж?|ܿg_? >NJӺ?-C?>8mP?=[?9^ԡ \ No newline at end of file diff --git a/fixture/12/0/36 b/fixture/12/0/36 deleted file mode 100644 index 4880d11394..0000000000 Binary files a/fixture/12/0/36 and /dev/null differ diff --git a/fixture/12/0/37 b/fixture/12/0/37 deleted file mode 100644 index d96b7f9322..0000000000 Binary files a/fixture/12/0/37 and /dev/null differ diff --git a/fixture/12/0/38 b/fixture/12/0/38 deleted file mode 100644 index 98effd7ff1..0000000000 Binary files a/fixture/12/0/38 and /dev/null differ diff --git a/fixture/12/0/39 b/fixture/12/0/39 deleted file mode 100644 index 6d42fb1981..0000000000 --- a/fixture/12/0/39 +++ /dev/null @@ -1,3 +0,0 @@ -oS"(_>cljP>Ț[__U -@^)뇽?ο4YuϿN&;$>\;L>@=6gS?ڕD>L=ұS>+=9?}7~>V.=`͏{=?- -ֿ(UNpt|>?JU??/?"f?V79?>&6w~iq?<,A?U:#?@㾅1t>x??YA?ӧ9P?H|=>?gM??猵V >\?Wm= ?0?n N6?޾St>G]?cD?|?0>#>. S:>t쏿9࿗}? \ No newline at end of file diff --git a/fixture/12/0/4 b/fixture/12/0/4 deleted file mode 100644 index a5df461dfe..0000000000 --- a/fixture/12/0/4 +++ /dev/null @@ -1,3 +0,0 @@ -c3? 7?IPޔ?J -=0ÿ8B?=q@}?.=B?C?=Ӿ-d>?~Q?X]t;\?ct?W[`*.?r>e?)ؒ?kq?y4?#IͿ??ٝ?LA? %>ö>}f?X?-=?u>ʯZ>62PtX>`? uq?~֝aQ@? -?H?`?f"?y>P?*j{?\qrQ+? ۿsS!?;{DOڿ"?^ݍ=SRE?eA}?Fq]pnEG೿G&>G2 \ No newline at end of file diff --git a/fixture/12/0/40 b/fixture/12/0/40 deleted file mode 100644 index e4f807ccc8..0000000000 Binary files a/fixture/12/0/40 and /dev/null differ diff --git a/fixture/12/0/41 b/fixture/12/0/41 deleted file mode 100644 index 23862be11c..0000000000 --- a/fixture/12/0/41 +++ /dev/null @@ -1,3 +0,0 @@ -U/q>p/x \?.?X=_w -%~>K ? F>`?՚g?R?:2 ?KR9?&> P>Ra(u=G0x?^t?W`k?q??>{J,¿?]eS+>*ʾaR]?>4?t? HFg_??сx:?щ?h*?>Ó>HAֻ*Fc@*n >'?$&I7$D?J@p>W -ѾxyB?=53nJ?Td]7?wL8fH?l?D@<{>>X>>3@ \ No newline at end of file diff --git a/fixture/12/0/42 b/fixture/12/0/42 deleted file mode 100644 index 8b46168136..0000000000 Binary files a/fixture/12/0/42 and /dev/null differ diff --git a/fixture/12/0/43 b/fixture/12/0/43 deleted file mode 100644 index 73d8748d67..0000000000 Binary files a/fixture/12/0/43 and /dev/null differ diff --git a/fixture/12/0/44 b/fixture/12/0/44 deleted file mode 100644 index 0f4fbe6750..0000000000 Binary files a/fixture/12/0/44 and /dev/null differ diff --git a/fixture/12/0/5 b/fixture/12/0/5 deleted file mode 100644 index ccb46ccd84..0000000000 Binary files a/fixture/12/0/5 and /dev/null differ diff --git a/fixture/12/0/6 b/fixture/12/0/6 deleted file mode 100644 index bd4fbccf89..0000000000 Binary files a/fixture/12/0/6 and /dev/null differ diff --git a/fixture/12/0/7 b/fixture/12/0/7 deleted file mode 100644 index 819851265f..0000000000 Binary files a/fixture/12/0/7 and /dev/null differ diff --git a/fixture/12/0/8 b/fixture/12/0/8 deleted file mode 100644 index f20d0033a7..0000000000 Binary files a/fixture/12/0/8 and /dev/null differ diff --git a/fixture/12/0/9 b/fixture/12/0/9 deleted file mode 100644 index 7a3eb5aee7..0000000000 Binary files a/fixture/12/0/9 and /dev/null differ diff --git a/fixture/12/1/.zarray b/fixture/12/1/.zarray deleted file mode 100644 index fcf798d35f..0000000000 --- a/fixture/12/1/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "zlib", - "level": 1 - }, - "dtype": "!=j ܾ?k>=A?n?3?唾SþM=1d=Z@q>+Ȇ>7M?92?CIZ?a'?Tfpf]>U$aC>n6zZ~?=?lp? ?1vdVeԾVIR@G>P$?zzV==????p?^?^w@p*>>h@Fv.[Q?z?3$@y!= ܾƤ \ No newline at end of file diff --git a/fixture/12/1/1 b/fixture/12/1/1 deleted file mode 100644 index 07917e896b..0000000000 Binary files a/fixture/12/1/1 and /dev/null differ diff --git a/fixture/12/1/10 b/fixture/12/1/10 deleted file mode 100644 index 481f147b51..0000000000 Binary files a/fixture/12/1/10 and /dev/null differ diff --git a/fixture/12/1/11 b/fixture/12/1/11 deleted file mode 100644 index 3cd01382cb..0000000000 Binary files a/fixture/12/1/11 and /dev/null differ diff --git a/fixture/12/1/12 b/fixture/12/1/12 deleted file mode 100644 index 3cbc8eee68..0000000000 Binary files a/fixture/12/1/12 and /dev/null differ diff --git a/fixture/12/1/13 b/fixture/12/1/13 deleted file mode 100644 index 096a4829b4..0000000000 Binary files a/fixture/12/1/13 and /dev/null differ diff --git a/fixture/12/1/14 b/fixture/12/1/14 deleted file mode 100644 index 30200df856..0000000000 Binary files a/fixture/12/1/14 and /dev/null differ diff --git a/fixture/12/1/15 b/fixture/12/1/15 deleted file mode 100644 index 47f4892ad8..0000000000 Binary files a/fixture/12/1/15 and /dev/null differ diff --git a/fixture/12/1/16 b/fixture/12/1/16 deleted file mode 100644 index 893273da09..0000000000 --- a/fixture/12/1/16 +++ /dev/null @@ -1,3 +0,0 @@ -xoD*?7?"A3ɾI>?C:t,ݸ?*d?E?>>H씾dN ?x>(An4^_x3?9,?+ ?Q >Ӂ_/9m=\T9?=G9Į>DV @{8>X?U?ʿоXX>Y&?T>?w?= ? `?@^ -߽\|s?|׿i?t ?pee?ῠq_ -y?ǃI=E?s}v&̾V'>D[84?D␾"?N?2O?k|X?;9@@K?|R?Wf?8 \ No newline at end of file diff --git a/fixture/12/1/17 b/fixture/12/1/17 deleted file mode 100644 index 7b82f5d416..0000000000 --- a/fixture/12/1/17 +++ /dev/null @@ -1 +0,0 @@ -xo~?$V?4?4ugg?T?9#?~Gx?q>[lz???->?_!np?AQDT]?G7?WU@.>b)' >>׹>F?D)J>@kŕh>柱=J ?2d>e?#-?=,k > ;j1]>Ţ?*Y?%?9@>#?{>;>8+?' >3_>`f;?+F`ڿ0>yag?׿8v?+R?[=b_2ܿu>anQ2:?HR?r.&˾1,ſo?K?O?]>t޿@6>x >I෾?b ?`UE? #| \ No newline at end of file diff --git a/fixture/12/1/18 b/fixture/12/1/18 deleted file mode 100644 index ecc2687456..0000000000 --- a/fixture/12/1/18 +++ /dev/null @@ -1,3 +0,0 @@ -xo>_>M?J^s(>fvH>(?+??: v?F=>;? ʾ wF??+7 ->w?ۨ| -'?O?[o0a>"?;@f?5þ{˿?4?m~>E?1DqĔO?p?If?u?r7b=BûD>oX=j>F">@߱?0=e]>F?R\?OEr??Z?Q;-q>6;?i8p5?%Qȿ">s>#;=a= *& x??T׾Իǿ+WG.%?8ǽ1+{>ՓI@T?ֽ?5xn.CƳ=򊥿ꜾZ_?Y -j 3>O?35 ?w>oē6-?S?@r.q`?ſ?Oz>E3?/w%#>sZ|OzNŚ?μ> K*m_?<b2???BA}>XGŵc>>#@ I3K?>4-?'N>Q[??R>_8s,@8?'#?>'Nѿǿ̾ h⪾g?pɋәl(?%?> >vk>!Ƕ?g>G?J־{G+v,a?m \ No newline at end of file diff --git a/fixture/12/1/2 b/fixture/12/1/2 deleted file mode 100644 index 78f392aee5..0000000000 Binary files a/fixture/12/1/2 and /dev/null differ diff --git a/fixture/12/1/20 b/fixture/12/1/20 deleted file mode 100644 index 5fb1a1ee53..0000000000 Binary files a/fixture/12/1/20 and /dev/null differ diff --git a/fixture/12/1/21 b/fixture/12/1/21 deleted file mode 100644 index 6178369a2d..0000000000 Binary files a/fixture/12/1/21 and /dev/null differ diff --git a/fixture/12/1/22 b/fixture/12/1/22 deleted file mode 100644 index 04183b5b15..0000000000 --- a/fixture/12/1/22 +++ /dev/null @@ -1,2 +0,0 @@ -xo*N?i;-'q*W?n$>U"*2:I-#9?S A>>i;?؎? ÿ&t@.># ?apm?P?,lw=ATo?9=o>/ ?>ܰE?ݾ|d?m&?w?=]P?f?Ҿ> |3@o*? -@TWpA|< Ϳu2Ti}dE?ϛIf??mq>׿ M1|>NHz(7p؄ӾV>7>|?!>>X6ajӾ(:"*sXW?;?iCď???E :(>JZ>?t)W)A*w,T \ No newline at end of file diff --git a/fixture/12/1/23 b/fixture/12/1/23 deleted file mode 100644 index 85a99ec544..0000000000 Binary files a/fixture/12/1/23 and /dev/null differ diff --git a/fixture/12/1/24 b/fixture/12/1/24 deleted file mode 100644 index 342dff4eb5..0000000000 Binary files a/fixture/12/1/24 and /dev/null differ diff --git a/fixture/12/1/25 b/fixture/12/1/25 deleted file mode 100644 index 1b9f0f0359..0000000000 Binary files a/fixture/12/1/25 and /dev/null differ diff --git a/fixture/12/1/26 b/fixture/12/1/26 deleted file mode 100644 index f5301aec4e..0000000000 Binary files a/fixture/12/1/26 and /dev/null differ diff --git a/fixture/12/1/27 b/fixture/12/1/27 deleted file mode 100644 index 2105c15f4c..0000000000 Binary files a/fixture/12/1/27 and /dev/null differ diff --git a/fixture/12/1/28 b/fixture/12/1/28 deleted file mode 100644 index 92350f4083..0000000000 Binary files a/fixture/12/1/28 and /dev/null differ diff --git a/fixture/12/1/29 b/fixture/12/1/29 deleted file mode 100644 index d3cf090614..0000000000 --- a/fixture/12/1/29 +++ /dev/null @@ -1,3 +0,0 @@ -xo?mze?Du5[]?Ĝ ?Խ=5NR\6e>ŢXq?E?S/Ñ?Ҿ6Ռ?9?H?Q>D\?9 >顳?k>N?dUluJP?GV9?f?=&qq@=w>w] U?'>ׇ>?T,= ?ݶ>hJ?,B?vfڿ"E>Xk?WѾ @Vs f -?.l>4z&?l>=?nGC?s!}Z1 ->JTP?Q!?@?z>13?jf٢?6Kvb?h?nj@?$?Y|B@?s>Xž9E?v_dǞ \ No newline at end of file diff --git a/fixture/12/1/3 b/fixture/12/1/3 deleted file mode 100644 index c97825a497..0000000000 Binary files a/fixture/12/1/3 and /dev/null differ diff --git a/fixture/12/1/30 b/fixture/12/1/30 deleted file mode 100644 index 869d74e79b..0000000000 Binary files a/fixture/12/1/30 and /dev/null differ diff --git a/fixture/12/1/31 b/fixture/12/1/31 deleted file mode 100644 index 3a4b307c88..0000000000 Binary files a/fixture/12/1/31 and /dev/null differ diff --git a/fixture/12/1/32 b/fixture/12/1/32 deleted file mode 100644 index 7ef3bb1dee..0000000000 Binary files a/fixture/12/1/32 and /dev/null differ diff --git a/fixture/12/1/33 b/fixture/12/1/33 deleted file mode 100644 index 7e1a04fb63..0000000000 --- a/fixture/12/1/33 +++ /dev/null @@ -1,3 +0,0 @@ -xor>rp?L@4ZW(ޝ?\>z >rҖ?YB5>+>˦1=I?0 -=v=˿?FD^?(d 15]>q??T?W=JmZ?}E>+ ?K|=(W?")>j}?z -><=~zc=?3[ξi#ZϾԶ=8zuj"=?t?ͧ ? }Cde=p?ſ6z>6?ѾW=P?)͐q=k?ň?q?;n{E>PE?ֆ'Kw=^3=?!F?oJږ_?+7=ᐿF?C@˖&$sx \ No newline at end of file diff --git a/fixture/12/1/34 b/fixture/12/1/34 deleted file mode 100644 index ef6e3a2354..0000000000 Binary files a/fixture/12/1/34 and /dev/null differ diff --git a/fixture/12/1/35 b/fixture/12/1/35 deleted file mode 100644 index f4b39eafdb..0000000000 --- a/fixture/12/1/35 +++ /dev/null @@ -1,3 +0,0 @@ -xo>?g)? -@3/>`1^:@t?[Xc?qp+?T?V t? (Hk']p=%*xd?!?,h>7e+˿Y?%;VJ!,??/?eCbm>7>?f?owG @ig?>=wM>NKM>>G9*ѿUVMc?BʿA> 7F>IΰҼ隿aж?|ܿg_? >NJӺ?-C?>8mP?=[?9^ԡysI \ No newline at end of file diff --git a/fixture/12/1/36 b/fixture/12/1/36 deleted file mode 100644 index 3def4de46f..0000000000 Binary files a/fixture/12/1/36 and /dev/null differ diff --git a/fixture/12/1/37 b/fixture/12/1/37 deleted file mode 100644 index 32893fc5bc..0000000000 Binary files a/fixture/12/1/37 and /dev/null differ diff --git a/fixture/12/1/38 b/fixture/12/1/38 deleted file mode 100644 index caa67b1aba..0000000000 Binary files a/fixture/12/1/38 and /dev/null differ diff --git a/fixture/12/1/39 b/fixture/12/1/39 deleted file mode 100644 index c6ce184496..0000000000 --- a/fixture/12/1/39 +++ /dev/null @@ -1,3 +0,0 @@ -xooS"(_>cljP>Ț[__U -@^)뇽?ο4YuϿN&;$>\;L>@=6gS?ڕD>L=ұS>+=9?}7~>V.=`͏{=?- -ֿ(UNpt|>?JU??/?"f?V79?>&6w~iq?<,A?U:#?@㾅1t>x??YA?ӧ9P?H|=>?gM??猵V >\?Wm= ?0?n N6?޾St>G]?cD?|?0>#>. S:>t쏿9࿗}?SR \ No newline at end of file diff --git a/fixture/12/1/4 b/fixture/12/1/4 deleted file mode 100644 index 60cc292a07..0000000000 --- a/fixture/12/1/4 +++ /dev/null @@ -1,3 +0,0 @@ -xoc3? 7?IPޔ?J -=0ÿ8B?=q@}?.=B?C?=Ӿ-d>?~Q?X]t;\?ct?W[`*.?r>e?)ؒ?kq?y4?#IͿ??ٝ?LA? %>ö>}f?X?-=?u>ʯZ>62PtX>`? uq?~֝aQ@? -?H?`?f"?y>P?*j{?\qrQ+? ۿsS!?;{DOڿ"?^ݍ=SRE?eA}?Fq]pnEG೿G&>G2j \ No newline at end of file diff --git a/fixture/12/1/40 b/fixture/12/1/40 deleted file mode 100644 index eca2cc90c3..0000000000 Binary files a/fixture/12/1/40 and /dev/null differ diff --git a/fixture/12/1/41 b/fixture/12/1/41 deleted file mode 100644 index 9300eb406a..0000000000 --- a/fixture/12/1/41 +++ /dev/null @@ -1,3 +0,0 @@ -xoU/q>p/x \?.?X=_w -%~>K ? F>`?՚g?R?:2 ?KR9?&> P>Ra(u=G0x?^t?W`k?q??>{J,¿?]eS+>*ʾaR]?>4?t? HFg_??сx:?щ?h*?>Ó>HAֻ*Fc@*n >'?$&I7$D?J@p>W -ѾxyB?=53nJ?Td]7?wL8fH?l?D@<{>>X>>3@.R \ No newline at end of file diff --git a/fixture/12/1/42 b/fixture/12/1/42 deleted file mode 100644 index f2dca581f1..0000000000 Binary files a/fixture/12/1/42 and /dev/null differ diff --git a/fixture/12/1/43 b/fixture/12/1/43 deleted file mode 100644 index 8ba2dd4a3a..0000000000 Binary files a/fixture/12/1/43 and /dev/null differ diff --git a/fixture/12/1/44 b/fixture/12/1/44 deleted file mode 100644 index ba549052b4..0000000000 Binary files a/fixture/12/1/44 and /dev/null differ diff --git a/fixture/12/1/5 b/fixture/12/1/5 deleted file mode 100644 index a96d45d4f4..0000000000 Binary files a/fixture/12/1/5 and /dev/null differ diff --git a/fixture/12/1/6 b/fixture/12/1/6 deleted file mode 100644 index c8b84989a6..0000000000 Binary files a/fixture/12/1/6 and /dev/null differ diff --git a/fixture/12/1/7 b/fixture/12/1/7 deleted file mode 100644 index d4015923be..0000000000 Binary files a/fixture/12/1/7 and /dev/null differ diff --git a/fixture/12/1/8 b/fixture/12/1/8 deleted file mode 100644 index e02434fc4b..0000000000 Binary files a/fixture/12/1/8 and /dev/null differ diff --git a/fixture/12/1/9 b/fixture/12/1/9 deleted file mode 100644 index 4356b3c24f..0000000000 Binary files a/fixture/12/1/9 and /dev/null differ diff --git a/fixture/12/2/.zarray b/fixture/12/2/.zarray deleted file mode 100644 index 7d45c627eb..0000000000 --- a/fixture/12/2/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "O.=Gu>>f$?zotԗoS \ No newline at end of file diff --git a/fixture/15/1/1 b/fixture/15/1/1 deleted file mode 100644 index 45aae56240..0000000000 --- a/fixture/15/1/1 +++ /dev/null @@ -1,2 +0,0 @@ -x -@U@Rƅai_۶f^3yμ}y3W9xNK>}^sߖokw~g켄S|o7}'&gULԇ}{:=˧_s*{폞[=ܾK \ No newline at end of file diff --git a/fixture/15/1/10 b/fixture/15/1/10 deleted file mode 100644 index 7437e5dfad..0000000000 --- a/fixture/15/1/10 +++ /dev/null @@ -1,2 +0,0 @@ -xA -0 ki S0iW[kqg#og=yg8O>WfcfYoջ=v{_?_J_w?eg|~7}`泾bewpw?3zzI \ No newline at end of file diff --git a/fixture/15/1/11 b/fixture/15/1/11 deleted file mode 100644 index a5b9b2c595..0000000000 --- a/fixture/15/1/11 +++ /dev/null @@ -1,2 +0,0 @@ -xA -P U @_(24I|./5G|֛֚y՟w}}ywM}w7ӏ{.O_p'Oz}O{ͩ7fɇ7;0ĩ_fѧ?( \ No newline at end of file diff --git a/fixture/15/1/12 b/fixture/15/1/12 deleted file mode 100644 index d00873aed2..0000000000 --- a/fixture/15/1/12 +++ /dev/null @@ -1,2 +0,0 @@ -x -0 Ca`>5+ȒeqzWR ok?zՉ^w7o~XO>u_3߼1f>3|g1N;O^3vxֻ?'y=R3ĻLO>- \ No newline at end of file diff --git a/fixture/15/1/13 b/fixture/15/1/13 deleted file mode 100644 index d4f2a83fa1..0000000000 --- a/fixture/15/1/13 +++ /dev/null @@ -1,2 +0,0 @@ -xA -0 kiPʐ Dk+'s^zT}c~Ogտ<ɳf:=ɇ~s>~owyֻޜwߓ>zya;n~irK~Oo=uz7>ߪ \ No newline at end of file diff --git a/fixture/15/1/14 b/fixture/15/1/14 deleted file mode 100644 index e88b8c90b8..0000000000 --- a/fixture/15/1/14 +++ /dev/null @@ -1,2 +0,0 @@ -x -@W_뻂2eȐ$sVgͣ]?kY__OOgw7$}w}ykݜo?}fys_sʟկs?ۗy'v}};<#_k׈ \ No newline at end of file diff --git a/fixture/15/1/15 b/fixture/15/1/15 deleted file mode 100644 index b349a1b7bb..0000000000 --- a/fixture/15/1/15 +++ /dev/null @@ -1,2 +0,0 @@ -x -0 ܁ YZnҹ6U^ֳ꺽?o.k[<=&{,Wǜ浞?3wϋI~3~7֛Ǐ|}g?[|7[oN?C{IS?Gy4/^᫾gS3Ss~~Iǟ{;>t8;~oy>wXOl籎_ʇn?^:7|zw?~;;}GyҼu3?z?iwȉ \ No newline at end of file diff --git a/fixture/15/1/17 b/fixture/15/1/17 deleted file mode 100644 index c1dbc1684e..0000000000 --- a/fixture/15/1/17 +++ /dev/null @@ -1,2 +0,0 @@ -xA -@ zث산z3`4!;1VsSs<3^3%23W~Oi^~}I}^wuhWu~{~<~;N#Ͼ_yUe{ɛo'ͽgۿ݉= \ No newline at end of file diff --git a/fixture/15/1/18 b/fixture/15/1/18 deleted file mode 100644 index fee75855f0..0000000000 --- a/fixture/15/1/18 +++ /dev/null @@ -1,2 +0,0 @@ -xA -Az*+ŭKA * R$tnkq^󿉯×99_[7M8{R_o=1S6u5/ݷߜާy#,{3{6}`색N \ No newline at end of file diff --git a/fixture/15/1/19 b/fixture/15/1/19 deleted file mode 100644 index e15f65db82..0000000000 --- a/fixture/15/1/19 +++ /dev/null @@ -1,2 +0,0 @@ -x -0 !גB +C}he̷ּx9A=0ѧ?g}Ǔi߿y8'I9/~0|>wNO߄o;5{=Oti?/0. \ No newline at end of file diff --git a/fixture/15/1/2 b/fixture/15/1/2 deleted file mode 100644 index 2d656020c8..0000000000 --- a/fixture/15/1/2 +++ /dev/null @@ -1,4 +0,0 @@ -xA -@zث(˾~[@ - -3:1:[\S^lZ{nNy;y}|3oNy_0w^b䥾w~֛۷>3vzyf7~z?s>ڟ3g9Os@ \ No newline at end of file diff --git a/fixture/15/1/20 b/fixture/15/1/20 deleted file mode 100644 index 773b02aea9..0000000000 --- a/fixture/15/1/20 +++ /dev/null @@ -1,3 +0,0 @@ -x -@ CWU\X ->XŁ2I.c5EE}uѯgo7Jփ߼+=x.&__G;qG>{oc뛷gY^oC=o j \ No newline at end of file diff --git a/fixture/15/1/21 b/fixture/15/1/21 deleted file mode 100644 index 7bd6a0e130..0000000000 Binary files a/fixture/15/1/21 and /dev/null differ diff --git a/fixture/15/1/22 b/fixture/15/1/22 deleted file mode 100644 index 821c481474..0000000000 Binary files a/fixture/15/1/22 and /dev/null differ diff --git a/fixture/15/1/23 b/fixture/15/1/23 deleted file mode 100644 index 6a680d1e8d..0000000000 --- a/fixture/15/1/23 +++ /dev/null @@ -1,2 +0,0 @@ -xA -0kIP!s"%i%sηgͣ標gy3y{u}o}{e^OzSYߪz}џg' \ No newline at end of file diff --git a/fixture/15/1/24 b/fixture/15/1/24 deleted file mode 100644 index 333ff4b1dc..0000000000 --- a/fixture/15/1/24 +++ /dev/null @@ -1,2 +0,0 @@ -xA -@ !BB€Yt!Ym:ۢs7+y;kϳ7ow~c3?O_幫}C凷|w1~W=~=g~gUy3*>}W|5_? \ No newline at end of file diff --git a/fixture/15/1/25 b/fixture/15/1/25 deleted file mode 100644 index 0c81e7e043..0000000000 --- a/fixture/15/1/25 +++ /dev/null @@ -1,2 +0,0 @@ -x -P ߯CbA(F\]B(kٍ.cugߪWC?>K6T4?ۿ||[};Oy:Nwtw~O~yk|KKߧ \ No newline at end of file diff --git a/fixture/15/1/26 b/fixture/15/1/26 deleted file mode 100644 index dba3814a87..0000000000 --- a/fixture/15/1/26 +++ /dev/null @@ -1,3 +0,0 @@ -x -1 ᾪ{* -s 9M23c[իj~ϪY?|ٜVOWyyd篲y«o}9߱S|_sk|՗'>]_=ټrsx7o?|+}|y;=7Q \ No newline at end of file diff --git a/fixture/15/1/27 b/fixture/15/1/27 deleted file mode 100644 index 5b7a815277..0000000000 --- a/fixture/15/1/27 +++ /dev/null @@ -1,2 +0,0 @@ -xA -@D\UnEAO OM(w%eYrs[>111'}}/7?o?^ߣ~~߼Z:ޏ__~U{<7ׯo>m__yۧ幗~z|^^}v߼:Vy? \ No newline at end of file diff --git a/fixture/15/1/28 b/fixture/15/1/28 deleted file mode 100644 index 92c14290e9..0000000000 --- a/fixture/15/1/28 +++ /dev/null @@ -1,2 +0,0 @@ -x -@CWЫ(;Jʃ, KLmugu?ooO>om?;>3ny'go \ No newline at end of file diff --git a/fixture/15/1/29 b/fixture/15/1/29 deleted file mode 100644 index dacc0d449f..0000000000 --- a/fixture/15/1/29 +++ /dev/null @@ -1,2 +0,0 @@ -x -P ߯CłPd`E C6cmOGջj>Ϫ^'ԳV?[/_~0{_Ol=l]yf%w={3>3~;o?ϛ<͞~>#qCO?}7ɳo}c_ȇS~K \ No newline at end of file diff --git a/fixture/15/1/3 b/fixture/15/1/3 deleted file mode 100644 index c86f01f657..0000000000 Binary files a/fixture/15/1/3 and /dev/null differ diff --git a/fixture/15/1/30 b/fixture/15/1/30 deleted file mode 100644 index b070f41e8c..0000000000 --- a/fixture/15/1/30 +++ /dev/null @@ -1,2 +0,0 @@ -x -@ DW_Kˁ0sm^YOPO[ifϔq}u}oLo1׸g?{G%ݏ|捭}Ic>at~ħߘ|^ѧ~|s_: \ No newline at end of file diff --git a/fixture/15/1/31 b/fixture/15/1/31 deleted file mode 100644 index 18c9fd3e32..0000000000 --- a/fixture/15/1/31 +++ /dev/null @@ -1,2 +0,0 @@ -x -@CWЫT(|Pٺ0,!L"cfZ_{菋/c,Sϼ$޻z9yN]yӯ=a>ç}ǻ1g{덭7Nپ߿wGoL=Лw_g{GO߮wywy~O^)?iy~?yY'=ylz>ֳ>YT \ No newline at end of file diff --git a/fixture/15/1/33 b/fixture/15/1/33 deleted file mode 100644 index b1d98acbc8..0000000000 --- a/fixture/15/1/33 +++ /dev/null @@ -1,2 +0,0 @@ -xA -@ ͡ג@uh2`I"$˲1OKգ[ozunͻ8qϾ<~v>Yyk7~ zgy<'xv>c1>w.~c7&{ycؓ~x>?oϞo=<}U}IC \ No newline at end of file diff --git a/fixture/15/1/35 b/fixture/15/1/35 deleted file mode 100644 index 393382fadc..0000000000 --- a/fixture/15/1/35 +++ /dev/null @@ -1,2 +0,0 @@ -x -@ W탯҂ ~92+61ƣ[3f1U>sto~Vy=tǿobͩO?X~][7ӗuO^OovUSu9}N~1'u3X=t|t`?pej \ No newline at end of file diff --git a/fixture/15/1/36 b/fixture/15/1/36 deleted file mode 100644 index 1dea3a54bd..0000000000 --- a/fixture/15/1/36 +++ /dev/null @@ -1,2 +0,0 @@ -x -P ߯bA(s\RBc6[Kէj~[~7=Qϣг_O}cO}[٧eN~yz3ܓΟy/}·<}ל~wױwz U \ No newline at end of file diff --git a/fixture/15/1/37 b/fixture/15/1/37 deleted file mode 100644 index 4a4f90a683..0000000000 --- a/fixture/15/1/37 +++ /dev/null @@ -1,3 +0,0 @@ -xA -@ a@eK&ɒs~c - o5z'=/<뷚~߼O`Mw^w';`ޯo3N8}?}ۯoL>~·Gx3N|Ӈ7 @~ \ No newline at end of file diff --git a/fixture/15/1/38 b/fixture/15/1/38 deleted file mode 100644 index 5e55c34981..0000000000 --- a/fixture/15/1/38 +++ /dev/null @@ -1,2 +0,0 @@ -x -P ߯C HT:.0dټiqzVo:fݷZGNz};c<~]fϛw5o#|f}>~f{__wy`ֻoN.)Γ>"fo>1y| \ No newline at end of file diff --git a/fixture/15/1/39 b/fixture/15/1/39 deleted file mode 100644 index b80e96596d..0000000000 --- a/fixture/15/1/39 +++ /dev/null @@ -1,2 +0,0 @@ -x -0 CۡAa精cMW0A,:؎zyU=±8jVʷs5gov>o{_qG`u|s>cn}w{O|Ky>a9i>Y~_}Ƀ?*k \ No newline at end of file diff --git a/fixture/15/1/4 b/fixture/15/1/4 deleted file mode 100644 index 9f2b544818..0000000000 Binary files a/fixture/15/1/4 and /dev/null differ diff --git a/fixture/15/1/40 b/fixture/15/1/40 deleted file mode 100644 index 65e84b7941..0000000000 --- a/fixture/15/1/40 +++ /dev/null @@ -1,2 +0,0 @@ -x -@ DWEP͂@U8-u5[kx+9{?WxOyo|{=C?c?}wNu>0``{o?IO?&4p~.o=}]?[^O7o~+3'~w_c%>Yo87o<ߟ~ѓN|޼/ \ No newline at end of file diff --git a/fixture/15/1/42 b/fixture/15/1/42 deleted file mode 100644 index 1ffee1d827..0000000000 Binary files a/fixture/15/1/42 and /dev/null differ diff --git a/fixture/15/1/43 b/fixture/15/1/43 deleted file mode 100644 index 66a592c592..0000000000 --- a/fixture/15/1/43 +++ /dev/null @@ -1,2 +0,0 @@ -x -@ W냯EA9p@@B2\5j{]j~foZ=O?<3~1s/ٻoғ也;_{O^~}仟o|1[Oy/]~g=y<߇yS_aΏ}ͩ~f8 \ No newline at end of file diff --git a/fixture/15/1/44 b/fixture/15/1/44 deleted file mode 100644 index 39caa64600..0000000000 --- a/fixture/15/1/44 +++ /dev/null @@ -1,2 +0,0 @@ -x -@ W탯 _o:PB(f7k[իjG?}og^4ywqV47sOoL4뷞xz~Wa켎s=/ov^ӟu=t`w_[O|7~OG&wUg~pt_c&?|tKy=~j}y].~'i޺y=ofNN/Ӽu3y]//n \ No newline at end of file diff --git a/fixture/15/1/48 b/fixture/15/1/48 deleted file mode 100644 index b57b31cdfa..0000000000 --- a/fixture/15/1/48 +++ /dev/null @@ -1,2 +0,0 @@ -x -0 烯rpXb A&ۮcS_Pgz]u7[o>~3~w^ݷl=y|o>Þ~9!ggI$=y;|sOix?}}~gͷb~Ï \ No newline at end of file diff --git a/fixture/15/1/49 b/fixture/15/1/49 deleted file mode 100644 index 283c33c31f..0000000000 --- a/fixture/15/1/49 +++ /dev/null @@ -1,2 +0,0 @@ -x -@ kQį7e dW*,˘]#bϑ*^?+ʳ}uϟ<*Ư;a,;ԫ}g\?7[?OGf֭yp/uez<.~֛wl?](W|Ǐ+7_K \ No newline at end of file diff --git a/fixture/15/1/5 b/fixture/15/1/5 deleted file mode 100644 index 0b390ec3a2..0000000000 --- a/fixture/15/1/5 +++ /dev/null @@ -1,2 +0,0 @@ -x -@CWP< °$]1:5ck;·WKxx񻊝><]_x'0:zc}=<}}7f~cIO~~7/Gq|֏}Gold` \ No newline at end of file diff --git a/fixture/15/1/50 b/fixture/15/1/50 deleted file mode 100644 index 96ba17b620..0000000000 --- a/fixture/15/1/50 +++ /dev/null @@ -1,2 +0,0 @@ -x -@CW(; }WII2cj^5yּz8~.M$_O<|cϼg}IwUw'ol`? ?w_>+U>ѫ̳/Y~w}t=~ͳw7'yG}wʗ<)\_߽?oɯ =g}.0zyUafyons_'~C^ϛu>뙻|3~׷>ٿ?Wysʟ|c}_e{uy胈 \ No newline at end of file diff --git a/fixture/15/1/53 b/fixture/15/1/53 deleted file mode 100644 index bf0e8df5d1..0000000000 Binary files a/fixture/15/1/53 and /dev/null differ diff --git a/fixture/15/1/54 b/fixture/15/1/54 deleted file mode 100644 index d8d820fd2f..0000000000 Binary files a/fixture/15/1/54 and /dev/null differ diff --git a/fixture/15/1/55 b/fixture/15/1/55 deleted file mode 100644 index f2d7c6fc55..0000000000 --- a/fixture/15/1/55 +++ /dev/null @@ -1,2 +0,0 @@ -xA -0 $RA_o.@Y&TadKDl5sR#kg8tW?Y_abO[z1ds멋5scӽGut UKܹ_8Uv \ No newline at end of file diff --git a/fixture/15/1/6 b/fixture/15/1/6 deleted file mode 100644 index 1395b624a7..0000000000 --- a/fixture/15/1/6 +++ /dev/null @@ -1,2 +0,0 @@ -x -0 C!ױA>({-H1wrzqb񌳳_j?y[ol=8{?K?ݾwӧ/gy޿g2?}>]߼~k}Yo\3O_L^G~g; \ No newline at end of file diff --git a/fixture/15/1/7 b/fixture/15/1/7 deleted file mode 100644 index d62802524b..0000000000 Binary files a/fixture/15/1/7 and /dev/null differ diff --git a/fixture/15/1/8 b/fixture/15/1/8 deleted file mode 100644 index 2a8fc693ab..0000000000 --- a/fixture/15/1/8 +++ /dev/null @@ -1,2 +0,0 @@ -xA -@ az#X MT!HN3ST߷wJtc>'s=_3O~<<֛g=us[&pҧއ[}|_ӏ~s]/Gz)/y7_z&/џX߽~'~}~ڇ|;S \ No newline at end of file diff --git a/fixture/15/2/.zarray b/fixture/15/2/.zarray deleted file mode 100644 index eec9c32ce1..0000000000 --- a/fixture/15/2/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "^==Ϋ:z}ɡ9W]>듯<7Oe_o =7"+`uwn}#G7O!~b \ No newline at end of file diff --git a/fixture/16/1/1 b/fixture/16/1/1 deleted file mode 100644 index aa9e976823..0000000000 Binary files a/fixture/16/1/1 and /dev/null differ diff --git a/fixture/16/1/10 b/fixture/16/1/10 deleted file mode 100644 index 0fe5bdce9e..0000000000 --- a/fixture/16/1/10 +++ /dev/null @@ -1 +0,0 @@ -x 0DQ1}L)$ԓS.T^|=QE.e6L6.]Svmc7-E>S'R>5Mu^o8ʟ}9e>k{'<5q4y=orΏ9Nȟ7 \ No newline at end of file diff --git a/fixture/16/1/11 b/fixture/16/1/11 deleted file mode 100644 index d5662c3206..0000000000 --- a/fixture/16/1/11 +++ /dev/null @@ -1,4 +0,0 @@ -x} FQ -qi-` -zzf -y1t_%Ro2ukԛ;uWdy~/uy.%{%s>9_)̏z$t/hz:!;׽&US \ No newline at end of file diff --git a/fixture/16/1/12 b/fixture/16/1/12 deleted file mode 100644 index b9f7cd8a97..0000000000 --- a/fixture/16/1/12 +++ /dev/null @@ -1 +0,0 @@ -xuҽ 0 BA;0 KLO;Ddt?D޶3uh<_8<ϗ eO3}\]Gt}&>yz͏Χ*_aeOr}tfoί\9}0}9=kS \ No newline at end of file diff --git a/fixture/16/1/13 b/fixture/16/1/13 deleted file mode 100644 index e632b0bed1..0000000000 --- a/fixture/16/1/13 +++ /dev/null @@ -1 +0,0 @@ -xu0 @“>BzxzX<$ܜtwukyΗqWSrq z+B9W}Ν?_Gd[C}aGT.Ǿ8>~rswV%y u齽Ԯ \ No newline at end of file diff --git a/fixture/16/1/14 b/fixture/16/1/14 deleted file mode 100644 index 5818cddc89..0000000000 --- a/fixture/16/1/14 +++ /dev/null @@ -1 +0,0 @@ -xu 0D-cRԓS.T^| 7Y i4?ӺL64}ёC]ܫ'sxUN\悔tDo ڠUG>Qyz8y< \ No newline at end of file diff --git a/fixture/16/1/15 b/fixture/16/1/15 deleted file mode 100644 index 7a453d6a7f..0000000000 --- a/fixture/16/1/15 +++ /dev/null @@ -1,2 +0,0 @@ -x 0+'}M) -C'!]Y.vzk=n˃x ٧n<y^-Y'1Z̃:ck|H澾w'G>tׇZ=3|$e^=ۿ#_Ӭݖ \ No newline at end of file diff --git a/fixture/16/1/16 b/fixture/16/1/16 deleted file mode 100644 index 1ea850689c..0000000000 --- a/fixture/16/1/16 +++ /dev/null @@ -1,2 +0,0 @@ -xu 0+'}MBzʇ -O"`Ivowu1{{lV9y_^<џ2熽zpNo^{p|j^|p}rt|YГt(C=rۛ9f}#7B2 \ No newline at end of file diff --git a/fixture/16/1/17 b/fixture/16/1/17 deleted file mode 100644 index 03851a6fc8..0000000000 Binary files a/fixture/16/1/17 and /dev/null differ diff --git a/fixture/16/1/18 b/fixture/16/1/18 deleted file mode 100644 index b39626a2f1..0000000000 --- a/fixture/16/1/18 +++ /dev/null @@ -1 +0,0 @@ -x 0+$;T'|RAz5|e:lPώ߾zSnpS λw|3t݃@h|E~п \ No newline at end of file diff --git a/fixture/16/1/23 b/fixture/16/1/23 deleted file mode 100644 index aeaf1a1efc..0000000000 --- a/fixture/16/1/23 +++ /dev/null @@ -1 +0,0 @@ -x}DP dp@D 8E#2e)3=F馦0Ϋ/ׯ?˰sO#٧s._~EΡ#rhӧQ9x#ʡ篹94qJDELJw^9Z>A(gZ \ No newline at end of file diff --git a/fixture/16/1/24 b/fixture/16/1/24 deleted file mode 100644 index c363425923..0000000000 --- a/fixture/16/1/24 +++ /dev/null @@ -1,2 +0,0 @@ -x} @ @“>xS@ -z3drtʻ6'y0~'NJxᵜ97ΑAynsۛ}P?\uѡ.5_?Û}ڟ}__k9x>L1 \ No newline at end of file diff --git a/fixture/16/1/25 b/fixture/16/1/25 deleted file mode 100644 index af7024af3f..0000000000 --- a/fixture/16/1/25 +++ /dev/null @@ -1,3 +0,0 @@ -x 0-s -H!@=rqEǘg D;i[mYH7ykE?t=}üGOڡ@◟͡d~r ^(z\.=+Nsw1 /# -2ow` \ No newline at end of file diff --git a/fixture/16/1/26 b/fixture/16/1/26 deleted file mode 100644 index 6457124755..0000000000 Binary files a/fixture/16/1/26 and /dev/null differ diff --git a/fixture/16/1/27 b/fixture/16/1/27 deleted file mode 100644 index 4324703a1b..0000000000 Binary files a/fixture/16/1/27 and /dev/null differ diff --git a/fixture/16/1/28 b/fixture/16/1/28 deleted file mode 100644 index 4f61f16a26..0000000000 Binary files a/fixture/16/1/28 and /dev/null differ diff --git a/fixture/16/1/29 b/fixture/16/1/29 deleted file mode 100644 index 3b378b05ed..0000000000 --- a/fixture/16/1/29 +++ /dev/null @@ -1 +0,0 @@ -xu 0-#}LBz8qz(i:Xx/zn_;:Qԫ?G;nz#둇z#pš^|ݯ=rݧ+{&U_+>\s~84v6 \ No newline at end of file diff --git a/fixture/16/1/3 b/fixture/16/1/3 deleted file mode 100644 index ee4392ca9c..0000000000 --- a/fixture/16/1/3 +++ /dev/null @@ -1 +0,0 @@ -xϱ PPI A2߁yd >BXپcǩYy-ԟ7Z\rPpx-w_Cy/}7s?~|k'K?n.O^і \ No newline at end of file diff --git a/fixture/16/1/30 b/fixture/16/1/30 deleted file mode 100644 index 6badc05fd4..0000000000 --- a/fixture/16/1/30 +++ /dev/null @@ -1 +0,0 @@ -xu 0-#}LBz8qz(S@,KT}76|Y]n\mw43~s{~wn\.?}8]_"]/.Ϗ7~ByOzroa6Oǧ;!Oyg?6޼>x93=7t" \ No newline at end of file diff --git a/fixture/16/1/32 b/fixture/16/1/32 deleted file mode 100644 index 4678297ce9..0000000000 Binary files a/fixture/16/1/32 and /dev/null differ diff --git a/fixture/16/1/33 b/fixture/16/1/33 deleted file mode 100644 index 7a6c03db41..0000000000 --- a/fixture/16/1/33 +++ /dev/null @@ -1 +0,0 @@ -xұ @D-}S zp=H~|:i4;ne|:y[a_=[ǡx/Oy|s49w5y'w7We?>Kc \ No newline at end of file diff --git a/fixture/16/1/34 b/fixture/16/1/34 deleted file mode 100644 index e25a1b5526..0000000000 --- a/fixture/16/1/34 +++ /dev/null @@ -1,2 +0,0 @@ -xѽ P `BA ;0 KJBOBDO:}?Ry/<<.M}e -ï.߼1z!qs<{9'C:=<鳇˞]?tHO>skn/8l \ No newline at end of file diff --git a/fixture/16/1/35 b/fixture/16/1/35 deleted file mode 100644 index 407d63ba1b..0000000000 Binary files a/fixture/16/1/35 and /dev/null differ diff --git a/fixture/16/1/36 b/fixture/16/1/36 deleted file mode 100644 index 5d993a5be6..0000000000 --- a/fixture/16/1/36 +++ /dev/null @@ -1 +0,0 @@ -x 0DQ‘>rBzrBXGQ٬S8׽ez\o>}o=VqX9֭7S<|5tgezg|OWrT~jP9z>U|nM2 \ No newline at end of file diff --git a/fixture/16/1/37 b/fixture/16/1/37 deleted file mode 100644 index b053686bf5..0000000000 --- a/fixture/16/1/37 +++ /dev/null @@ -1 +0,0 @@ -x}б 0$p2H&ǕL]TppG@OeӲN><0qj[E5뷽7s_G|C8CrG5{>;>:\λU._{Qo? \ No newline at end of file diff --git a/fixture/16/1/38 b/fixture/16/1/38 deleted file mode 100644 index b7e11c34a3..0000000000 Binary files a/fixture/16/1/38 and /dev/null differ diff --git a/fixture/16/1/39 b/fixture/16/1/39 deleted file mode 100644 index 4af7763708..0000000000 --- a/fixture/16/1/39 +++ /dev/null @@ -1 +0,0 @@ -xu 1 “>MBzxzɓEe (׻\ky^|zOOA:Xu rF>=Odx=r=r^k7:}:?}H}:T/xO~:'t9Tٹ \ No newline at end of file diff --git a/fixture/16/1/4 b/fixture/16/1/4 deleted file mode 100644 index 5b4dd4281a..0000000000 --- a/fixture/16/1/4 +++ /dev/null @@ -1 +0,0 @@ -xu 0PcRH*pԓS.T@/>ƀ#aY^gg-C)ju/x,|ZzZx~XnVvS<%fg.>|/uG|%ĽU~|Uz,/8~<,<: \ No newline at end of file diff --git a/fixture/16/1/40 b/fixture/16/1/40 deleted file mode 100644 index 27acb8204e..0000000000 --- a/fixture/16/1/40 +++ /dev/null @@ -1 +0,0 @@ -xeһ@ aB>)P\h`nYꂋ8޾i?xڇxg?X׼ohKT9:_|΋s}u#qOOU\o#G=OQx7y~8y{ \ No newline at end of file diff --git a/fixture/16/1/41 b/fixture/16/1/41 deleted file mode 100644 index 12617e423b..0000000000 Binary files a/fixture/16/1/41 and /dev/null differ diff --git a/fixture/16/1/42 b/fixture/16/1/42 deleted file mode 100644 index cf30a62450..0000000000 --- a/fixture/16/1/42 +++ /dev/null @@ -1 +0,0 @@ -x 0@gRH*pԓW> 4X>!ݽ;ԎY6Z/V/G=w,y)08]P.A>/L}v}SoĒߏcz~^u!whw \ No newline at end of file diff --git a/fixture/16/1/43 b/fixture/16/1/43 deleted file mode 100644 index 25dbc980f3..0000000000 Binary files a/fixture/16/1/43 and /dev/null differ diff --git a/fixture/16/1/44 b/fixture/16/1/44 deleted file mode 100644 index 918b16ea53..0000000000 --- a/fixture/16/1/44 +++ /dev/null @@ -1,2 +0,0 @@ -xα @ DQBHpP@=]Bˆ i4_8oiCakS}{Ww28n3ok뙷k>9r=zϹO\q9GyD>{h -Z@ \ No newline at end of file diff --git a/fixture/16/1/45 b/fixture/16/1/45 deleted file mode 100644 index e92de042e6..0000000000 --- a/fixture/16/1/45 +++ /dev/null @@ -1 +0,0 @@ -xuл 0`BA;0O4L..|D>ɑO)yo]߶tռgV=7]fO[Ïx3紗HWƽR|n=r3_0{$o7o~bg}Og \ No newline at end of file diff --git a/fixture/16/1/46 b/fixture/16/1/46 deleted file mode 100644 index a6aacbe767..0000000000 Binary files a/fixture/16/1/46 and /dev/null differ diff --git a/fixture/16/1/47 b/fixture/16/1/47 deleted file mode 100644 index e9d78090a7..0000000000 --- a/fixture/16/1/47 +++ /dev/null @@ -1,2 +0,0 @@ -x] P -}RH*דS. p~B Bg܎s;>{u]r|}[SlUN"?į9s_\ujBJ9ï>>>nO\!gΥ9sQ'>]ssR'ʃr \ No newline at end of file diff --git a/fixture/16/1/48 b/fixture/16/1/48 deleted file mode 100644 index 89e635c152..0000000000 --- a/fixture/16/1/48 +++ /dev/null @@ -1 +0,0 @@ -xα @ @ 2{|2''UO]\ư}xHyIQu՞xO-h_9sqyr*[uy.Ϟ~Hכzo9|9'ΧQzWkz_'>wG_\ \ No newline at end of file diff --git a/fixture/16/1/49 b/fixture/16/1/49 deleted file mode 100644 index 15cbd1bb4d..0000000000 Binary files a/fixture/16/1/49 and /dev/null differ diff --git a/fixture/16/1/5 b/fixture/16/1/5 deleted file mode 100644 index 4d42627087..0000000000 Binary files a/fixture/16/1/5 and /dev/null differ diff --git a/fixture/16/1/50 b/fixture/16/1/50 deleted file mode 100644 index c3e5fdf72d..0000000000 Binary files a/fixture/16/1/50 and /dev/null differ diff --git a/fixture/16/1/51 b/fixture/16/1/51 deleted file mode 100644 index 6cbc95f96a..0000000000 --- a/fixture/16/1/51 +++ /dev/null @@ -1 +0,0 @@ -x 0-#}LB!pB#&(nfOg˘m6}G<{3#ǯ̏W:CzssO(>~g<7߃੏P[/}: \ No newline at end of file diff --git a/fixture/16/1/52 b/fixture/16/1/52 deleted file mode 100644 index 20e869ca1c..0000000000 --- a/fixture/16/1/52 +++ /dev/null @@ -1,2 +0,0 @@ -x} 0@3}N) -C+*H/~ŒXnvaٷe^kXjq:ڭ۽_Uu΍yι|)7y<x:5+:y5~g[c=GOkZ~}r/KN9rBz8qz#"&|~03G> -C}_d^>&uͳGD3ܪE-̃g; {>^j*\}O|/%都_s$*v?L_t}_< \ No newline at end of file diff --git a/fixture/16/1/55 b/fixture/16/1/55 deleted file mode 100644 index be5892bd8b..0000000000 --- a/fixture/16/1/55 +++ /dev/null @@ -1,2 +0,0 @@ -xб @D->-B -PQr f|fgZlXǝ<#+x()zɗ}P^r=y?/Sѯ9 \ No newline at end of file diff --git a/fixture/16/1/8 b/fixture/16/1/8 deleted file mode 100644 index b6b547a685..0000000000 Binary files a/fixture/16/1/8 and /dev/null differ diff --git a/fixture/16/1/9 b/fixture/16/1/9 deleted file mode 100644 index 4cc02933ea..0000000000 --- a/fixture/16/1/9 +++ /dev/null @@ -1,3 +0,0 @@ -xu @ P‘>L) -ą -e$S*"ƞeٞWΗ}4{FǫtoϾJFur3Jo>>N:{"~ڧ3N;p/11ܶ \ No newline at end of file diff --git a/fixture/16/2/.zarray b/fixture/16/2/.zarray deleted file mode 100644 index c4bed6fbc7..0000000000 --- a/fixture/16/2/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "fl`#DEsIgLu2L((eʩ<߭ tiI/eyCF -!!!4"TX^!Lb2죄#|I)'8)bhGxx:Ӆd3d*'j &d0^"ьa,l|N18jG4mxXLd%,Tr&MOՋ$ a(F -Ƨ=쥡lD8x(Z 0ILf:bsyy$82sΆ ]oI??+Yj76OFp}ҐO3F3LLdo0C'8˷}@'~Eg,=Xu|znrԾ7`Q a(x^c^Oq9Nцvt):1"e1\{pqF&y31d[mf?8HphF+5myvL 3?3,dgs\\׹Aу~ dYGg g+;.9ӀH)1y LLc18g7gE.qƯNoЗ| -Wֱl!ȁs' A"HF')Le?8H_qrNsiG;x$ XB,'5׹A.a dIeF82NvQ짔sv"GhKJx@;"10qd2d&f>9s3T*$FiG7x$3YZ -M"BkG4&(M:`2 fE)G)PN$>Z;$ғ^$2X͇䳁PCvPP҈pRNcyt&2%$ю8:8t ]f>9,=rY*VSUOJ aD*X -N{bq#DҎh1:0d1f1KXJ -?PMnqjGyd0al`#lS -3A \ No newline at end of file diff --git a/fixture/20/1/0.0.2 b/fixture/20/1/0.0.2 deleted file mode 100644 index 1b7df776b6..0000000000 Binary files a/fixture/20/1/0.0.2 and /dev/null differ diff --git a/fixture/20/1/0.0.3 b/fixture/20/1/0.0.3 deleted file mode 100644 index a371944eea..0000000000 Binary files a/fixture/20/1/0.0.3 and /dev/null differ diff --git a/fixture/20/1/0.1.0 b/fixture/20/1/0.1.0 deleted file mode 100644 index bc9a97df71..0000000000 --- a/fixture/20/1/0.1.0 +++ /dev/null @@ -1,9 +0,0 @@ -x c_k̈YY9i9֒C89NCph'Iph'IơY8,94Nxϋ7:<@y$,f[, ,<H-q:nqw҇Tg i c8f}MiF&gB6o0K'9EpPK'A2ЛjְB>d#El@F‰Qa,5& 9RWTP9HI $R{ R-n#PFN:/3 &I {G)s2NPN=O'%n5\@A &2IcIO) (%^"vёGD3fo"JQE5RunpڑL# a(lm|DBӎ0"hI$1% &t^'9ee|M%L5jG"x$ҏd -VH;iH#BiN "hҙd0iL (8_RipщnWbY -J> nhE fCx^aOr/8NLvҁґ.a.XȟXreL5/׹ÏhG?30Hmb?qA$Leәf!9Y..qFϦы> Yfv ;iL;ҌiN4І2Y_pSӜrvt'y>,]Vlb3w@sý4 4c/1qLb2Sg([hG:3Н'f!3YrmB"l( b8ec8v ;?apцx= f%e᭴Q&8"ILf -ټLrO.U|yjB-ݣ,X* -zІL -He(/(FN)fRAQJdvDK;#t"d37aYB%稢o:7IR[H?ϑ`06P6>]fF-$ĒD2d1̣s[;HOD_џ"|@[6#4" Q& JStrR%e g? f \ No newline at end of file diff --git a/fixture/20/1/0.1.1 b/fixture/20/1/0.1.1 deleted file mode 100644 index dffa896ae2..0000000000 Binary files a/fixture/20/1/0.1.1 and /dev/null differ diff --git a/fixture/20/1/0.1.2 b/fixture/20/1/0.1.2 deleted file mode 100644 index a21b9f5179..0000000000 Binary files a/fixture/20/1/0.1.2 and /dev/null differ diff --git a/fixture/20/1/0.1.3 b/fixture/20/1/0.1.3 deleted file mode 100644 index 29c51b7bdf..0000000000 Binary files a/fixture/20/1/0.1.3 and /dev/null differ diff --git a/fixture/20/1/0.2.0 b/fixture/20/1/0.2.0 deleted file mode 100644 index fc0b00370c..0000000000 Binary files a/fixture/20/1/0.2.0 and /dev/null differ diff --git a/fixture/20/1/0.2.1 b/fixture/20/1/0.2.1 deleted file mode 100644 index 94b8d35554..0000000000 Binary files a/fixture/20/1/0.2.1 and /dev/null differ diff --git a/fixture/20/1/0.2.2 b/fixture/20/1/0.2.2 deleted file mode 100644 index bb22ea0ba8..0000000000 --- a/fixture/20/1/0.2.2 +++ /dev/null @@ -1,6 +0,0 @@ -x q|u_n133K%紖'Iph:4Nf,%ɱp,%ɱp,?v Iʳ" `%XM1ۂ Mf c5&4sC9Ag9G:AI 3,XI)&~a%HP1L^a4YL$a/|A*8N%Bړt"IfsG>ﱈVPepZ~&F΋ %QdRVQg즌s?H!amB,{.S jIC;R1!Sz']QDӒhK>g?jG4m8: :2)Le&by4g5~:="'5} b kYF&);hH;"h45҆qgo&3rS;.%R;ҝ_у^XMnܥћ eldg;^6͈Zӎh$^ Gf0ps\ -Wvvt'gEJVb>b%le͵>Q4)1"ьa,ټ$rtrT5',B;:t#gHeYB!"SMnDN$b(N&0,&nr v-=Z?M:vh26EюㄱuجYXhWB+Eq whwv[36=E`!F/]Evc{F{fL̂vhq|}wc[d BGxbl}D"ڝic_I"kݥ}c+ hwvG{|46KDa9If3\^>76/i̙,2e\56?i$h@{FcQ좈!N)L iF&8F9|E%Eb[kG t#/ uO>6B]&OCRIyt0ise4D=Ѵ=t+ ,d9,]rYZqp A.L -#xT2dE39)hJ f{( hY;ӌhN i `&#Yro8enp[|Z;zџ_3! elolg7{K.Qa4i4% d0dC(<Hl/#n$қ>%䱎gBvP ӐTFx^!)Le|A8MQ#v'.t%,"K.kX:q#A]eIa/X1좈|F19Q - \ No newline at end of file diff --git a/fixture/20/1/1.0.1 b/fixture/20/1/1.0.1 deleted file mode 100644 index 20317ff6b5..0000000000 Binary files a/fixture/20/1/1.0.1 and /dev/null differ diff --git a/fixture/20/1/1.0.2 b/fixture/20/1/1.0.2 deleted file mode 100644 index 5fc020f357..0000000000 Binary files a/fixture/20/1/1.0.2 and /dev/null differ diff --git a/fixture/20/1/1.0.3 b/fixture/20/1/1.0.3 deleted file mode 100644 index 22ca3074c3..0000000000 Binary files a/fixture/20/1/1.0.3 and /dev/null differ diff --git a/fixture/20/1/1.1.0 b/fixture/20/1/1.1.0 deleted file mode 100644 index 676f4d60ce..0000000000 Binary files a/fixture/20/1/1.1.0 and /dev/null differ diff --git a/fixture/20/1/1.1.1 b/fixture/20/1/1.1.1 deleted file mode 100644 index c06c4c4413..0000000000 Binary files a/fixture/20/1/1.1.1 and /dev/null differ diff --git a/fixture/20/1/1.1.2 b/fixture/20/1/1.1.2 deleted file mode 100644 index 9a602bf859..0000000000 Binary files a/fixture/20/1/1.1.2 and /dev/null differ diff --git a/fixture/20/1/1.1.3 b/fixture/20/1/1.1.3 deleted file mode 100644 index efe26bf741..0000000000 Binary files a/fixture/20/1/1.1.3 and /dev/null differ diff --git a/fixture/20/1/1.2.0 b/fixture/20/1/1.2.0 deleted file mode 100644 index 8c8e48b27e..0000000000 Binary files a/fixture/20/1/1.2.0 and /dev/null differ diff --git a/fixture/20/1/1.2.1 b/fixture/20/1/1.2.1 deleted file mode 100644 index 7a8b0b8c02..0000000000 --- a/fixture/20/1/1.2.1 +++ /dev/null @@ -1,5 +0,0 @@ -x |onIkIk9̒fFbf84KcY84KcY84¡Yrh '!'y}x^M7}Hy0lI)OA0qMsbd$y9C%p*s LRI/@:X6!(f%jQplc;!J] +L` -d*3yY NppK|eڵӎtYҋa9ﲂ5lb3[vԤ6x%F2?0ILf -9Cђ<`fP줔}^v'iD4͉aB.o0<1S |% :iGRI/@:X6!(f%JҎڄ1"XƑkL"r8.RI|ghK"H",YE].ʽ" 'a 'Cɦ}짌9B9' Y!ē@GcPJ -:7 %4P^$Q&bvP^>CnI͈&Vđ r[䱈sTr6w&%Y;Rs1! ld;QnNC%&4%,&t^'a.圤|pk$hG"t'?|@[vBzhG(uy0 h2d1iL)_RYqT툣=H+HfgQZֱܡ=?Pˏόr \ No newline at end of file diff --git a/fixture/20/1/1.2.2 b/fixture/20/1/1.2.2 deleted file mode 100644 index 39b2e3211a..0000000000 Binary files a/fixture/20/1/1.2.2 and /dev/null differ diff --git a/fixture/20/1/1.2.3 b/fixture/20/1/1.2.3 deleted file mode 100644 index a97d1c2770..0000000000 Binary files a/fixture/20/1/1.2.3 and /dev/null differ diff --git a/fixture/20/1/1.3.0 b/fixture/20/1/1.3.0 deleted file mode 100644 index cf472c33aa..0000000000 Binary files a/fixture/20/1/1.3.0 and /dev/null differ diff --git a/fixture/20/1/1.3.1 b/fixture/20/1/1.3.1 deleted file mode 100644 index 05047d37a3..0000000000 Binary files a/fixture/20/1/1.3.1 and /dev/null differ diff --git a/fixture/20/1/1.3.2 b/fixture/20/1/1.3.2 deleted file mode 100644 index 51c00060d4..0000000000 Binary files a/fixture/20/1/1.3.2 and /dev/null differ diff --git a/fixture/20/1/1.3.3 b/fixture/20/1/1.3.3 deleted file mode 100644 index 89f2759df3..0000000000 Binary files a/fixture/20/1/1.3.3 and /dev/null differ diff --git a/fixture/20/2/.zarray b/fixture/20/2/.zarray deleted file mode 100644 index 5738808845..0000000000 --- a/fixture/20/2/.zarray +++ /dev/null @@ -1,21 +0,0 @@ -{ - "chunks": [ - 100, - 3, - 3 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "0ɤ,f3|Gc>+Hc%X>3ֲlc;;.+v[8'8).s\W~:7x#/<#GI9'yC^Q(AIJ2P2TթAMjј&4iEKZE':Ӆtmޡ;= =3 c8L`"@"sˇ|<1f kY|z6]|Wf_ {qN=?pSO WƯun;p[< OyƿG@)H.r/#?(HIJ2P2PԤC(uiN -ZҊִ-toAOzћh3 c8`$q3La* $2$Lrr )@A -Q@(MR`B(O*RԢ6u.O hI+ZӆӎDIwzГ^&>%~ '$xF11%D1$LRHe Ybe,gigD:lf [^9Qq3E.q+]q<O&2d#;9I -RDRBy*PJT -UF(uG}h@Cј&%v'H:Б(:ћhЗџ dq3ьa,&D23I -b6s b5kH'la+v0G81;s\"~r>x#/,!z'ANyr@(BQQ(AIJQJT -UƫFujF҈4)hN "BWM \ No newline at end of file diff --git a/fixture/21/1/0.0.2 b/fixture/21/1/0.0.2 deleted file mode 100644 index 81a1664ada..0000000000 Binary files a/fixture/21/1/0.0.2 and /dev/null differ diff --git a/fixture/21/1/0.0.3 b/fixture/21/1/0.0.3 deleted file mode 100644 index e6715069bb..0000000000 Binary files a/fixture/21/1/0.0.3 and /dev/null differ diff --git a/fixture/21/1/0.1.0 b/fixture/21/1/0.1.0 deleted file mode 100644 index 1d7afd43be..0000000000 Binary files a/fixture/21/1/0.1.0 and /dev/null differ diff --git a/fixture/21/1/0.1.1 b/fixture/21/1/0.1.1 deleted file mode 100644 index eb6ba3e12d..0000000000 Binary files a/fixture/21/1/0.1.1 and /dev/null differ diff --git a/fixture/21/1/0.1.2 b/fixture/21/1/0.1.2 deleted file mode 100644 index ad7a0df808..0000000000 Binary files a/fixture/21/1/0.1.2 and /dev/null differ diff --git a/fixture/21/1/0.1.3 b/fixture/21/1/0.1.3 deleted file mode 100644 index 4e2f9caff1..0000000000 Binary files a/fixture/21/1/0.1.3 and /dev/null differ diff --git a/fixture/21/1/0.2.0 b/fixture/21/1/0.2.0 deleted file mode 100644 index 16b1382692..0000000000 Binary files a/fixture/21/1/0.2.0 and /dev/null differ diff --git a/fixture/21/1/0.2.1 b/fixture/21/1/0.2.1 deleted file mode 100644 index e76a1ed58f..0000000000 Binary files a/fixture/21/1/0.2.1 and /dev/null differ diff --git a/fixture/21/1/0.2.2 b/fixture/21/1/0.2.2 deleted file mode 100644 index 5028948a1c..0000000000 Binary files a/fixture/21/1/0.2.2 and /dev/null differ diff --git a/fixture/21/1/0.2.3 b/fixture/21/1/0.2.3 deleted file mode 100644 index cac892ae90..0000000000 Binary files a/fixture/21/1/0.2.3 and /dev/null differ diff --git a/fixture/21/1/0.3.0 b/fixture/21/1/0.3.0 deleted file mode 100644 index 5d16a9164b..0000000000 Binary files a/fixture/21/1/0.3.0 and /dev/null differ diff --git a/fixture/21/1/0.3.1 b/fixture/21/1/0.3.1 deleted file mode 100644 index b8fd44b7e1..0000000000 Binary files a/fixture/21/1/0.3.1 and /dev/null differ diff --git a/fixture/21/1/0.3.2 b/fixture/21/1/0.3.2 deleted file mode 100644 index 397044926b..0000000000 Binary files a/fixture/21/1/0.3.2 and /dev/null differ diff --git a/fixture/21/1/0.3.3 b/fixture/21/1/0.3.3 deleted file mode 100644 index 3c0d13d712..0000000000 --- a/fixture/21/1/0.3.3 +++ /dev/null @@ -1 +0,0 @@ -x%VP@GeXqKǥqt\:{{8)UPIPK4HʹJd蠓.顗>`!a1ƙ`)a9Y`%Ya5`-a=9#93ι+;y'y7/TRKPHŔŁ8@q ā8Yk \ No newline at end of file diff --git a/fixture/21/1/1.0.0 b/fixture/21/1/1.0.0 deleted file mode 100644 index 6d025242f1..0000000000 Binary files a/fixture/21/1/1.0.0 and /dev/null differ diff --git a/fixture/21/1/1.0.1 b/fixture/21/1/1.0.1 deleted file mode 100644 index f3514167c8..0000000000 Binary files a/fixture/21/1/1.0.1 and /dev/null differ diff --git a/fixture/21/1/1.0.2 b/fixture/21/1/1.0.2 deleted file mode 100644 index 77222f05cd..0000000000 Binary files a/fixture/21/1/1.0.2 and /dev/null differ diff --git a/fixture/21/1/1.0.3 b/fixture/21/1/1.0.3 deleted file mode 100644 index a0bb99cde7..0000000000 Binary files a/fixture/21/1/1.0.3 and /dev/null differ diff --git a/fixture/21/1/1.1.0 b/fixture/21/1/1.1.0 deleted file mode 100644 index cb1fb9c5a6..0000000000 Binary files a/fixture/21/1/1.1.0 and /dev/null differ diff --git a/fixture/21/1/1.1.1 b/fixture/21/1/1.1.1 deleted file mode 100644 index 02eaaa3bf6..0000000000 Binary files a/fixture/21/1/1.1.1 and /dev/null differ diff --git a/fixture/21/1/1.1.2 b/fixture/21/1/1.1.2 deleted file mode 100644 index eaad046efd..0000000000 Binary files a/fixture/21/1/1.1.2 and /dev/null differ diff --git a/fixture/21/1/1.1.3 b/fixture/21/1/1.1.3 deleted file mode 100644 index 51643ee8e5..0000000000 Binary files a/fixture/21/1/1.1.3 and /dev/null differ diff --git a/fixture/21/1/1.2.0 b/fixture/21/1/1.2.0 deleted file mode 100644 index b0ab313318..0000000000 Binary files a/fixture/21/1/1.2.0 and /dev/null differ diff --git a/fixture/21/1/1.2.1 b/fixture/21/1/1.2.1 deleted file mode 100644 index d95fa9baf9..0000000000 Binary files a/fixture/21/1/1.2.1 and /dev/null differ diff --git a/fixture/21/1/1.2.2 b/fixture/21/1/1.2.2 deleted file mode 100644 index 613b89e2f2..0000000000 Binary files a/fixture/21/1/1.2.2 and /dev/null differ diff --git a/fixture/21/1/1.2.3 b/fixture/21/1/1.2.3 deleted file mode 100644 index e8ebc253b6..0000000000 Binary files a/fixture/21/1/1.2.3 and /dev/null differ diff --git a/fixture/21/1/1.3.0 b/fixture/21/1/1.3.0 deleted file mode 100644 index a5ecc850a2..0000000000 Binary files a/fixture/21/1/1.3.0 and /dev/null differ diff --git a/fixture/21/1/1.3.1 b/fixture/21/1/1.3.1 deleted file mode 100644 index 3e220e32a4..0000000000 Binary files a/fixture/21/1/1.3.1 and /dev/null differ diff --git a/fixture/21/1/1.3.2 b/fixture/21/1/1.3.2 deleted file mode 100644 index 43c2585dbc..0000000000 Binary files a/fixture/21/1/1.3.2 and /dev/null differ diff --git a/fixture/21/1/1.3.3 b/fixture/21/1/1.3.3 deleted file mode 100644 index 77b54c165a..0000000000 Binary files a/fixture/21/1/1.3.3 and /dev/null differ diff --git a/fixture/21/2/.zarray b/fixture/21/2/.zarray deleted file mode 100644 index 9401811b6e..0000000000 --- a/fixture/21/2/.zarray +++ /dev/null @@ -1,21 +0,0 @@ -{ - "chunks": [ - 10, - 30, - 3 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "ƕعķ8MUw>oS]q9x#6oD6vq /Yk?ꎝQ\Sy|qwN@4㸈 43 diff --git a/fixture/22/1/0.0.1.0 b/fixture/22/1/0.0.1.0 deleted file mode 100644 index a25bc6a451..0000000000 Binary files a/fixture/22/1/0.0.1.0 and /dev/null differ diff --git a/fixture/22/1/0.0.1.1 b/fixture/22/1/0.0.1.1 deleted file mode 100644 index 42f4456bcc..0000000000 Binary files a/fixture/22/1/0.0.1.1 and /dev/null differ diff --git a/fixture/22/1/0.0.1.2 b/fixture/22/1/0.0.1.2 deleted file mode 100644 index f3d1405e2e..0000000000 Binary files a/fixture/22/1/0.0.1.2 and /dev/null differ diff --git a/fixture/22/1/0.0.1.3 b/fixture/22/1/0.0.1.3 deleted file mode 100644 index a64124fb02..0000000000 --- a/fixture/22/1/0.0.1.3 +++ /dev/null @@ -1,4 +0,0 @@ -xM1(auN!td0`$ L I&$d0$$Ip$`d1wz3L*Gt\p7 -Ohm .Eo=~L?.f -S|q-#~Ŏ،  >F -lglp-\3[by+x'=pOFgRlV:~ϸ3Kx:9/ Ptq3 \ No newline at end of file diff --git a/fixture/22/1/0.0.2.0 b/fixture/22/1/0.0.2.0 deleted file mode 100644 index b3a0fe53fb..0000000000 Binary files a/fixture/22/1/0.0.2.0 and /dev/null differ diff --git a/fixture/22/1/0.0.2.1 b/fixture/22/1/0.0.2.1 deleted file mode 100644 index ba298400be..0000000000 Binary files a/fixture/22/1/0.0.2.1 and /dev/null differ diff --git a/fixture/22/1/0.0.2.2 b/fixture/22/1/0.0.2.2 deleted file mode 100644 index 8ef05933d1..0000000000 Binary files a/fixture/22/1/0.0.2.2 and /dev/null differ diff --git a/fixture/22/1/0.0.2.3 b/fixture/22/1/0.0.2.3 deleted file mode 100644 index d653bb051c..0000000000 Binary files a/fixture/22/1/0.0.2.3 and /dev/null differ diff --git a/fixture/22/1/0.0.3.0 b/fixture/22/1/0.0.3.0 deleted file mode 100644 index b04cb5d95e..0000000000 Binary files a/fixture/22/1/0.0.3.0 and /dev/null differ diff --git a/fixture/22/1/0.0.3.1 b/fixture/22/1/0.0.3.1 deleted file mode 100644 index 6782af04ed..0000000000 Binary files a/fixture/22/1/0.0.3.1 and /dev/null differ diff --git a/fixture/22/1/0.0.3.2 b/fixture/22/1/0.0.3.2 deleted file mode 100644 index 3614c1ba78..0000000000 Binary files a/fixture/22/1/0.0.3.2 and /dev/null differ diff --git a/fixture/22/1/0.0.3.3 b/fixture/22/1/0.0.3.3 deleted file mode 100644 index de707a4ed7..0000000000 Binary files a/fixture/22/1/0.0.3.3 and /dev/null differ diff --git a/fixture/22/1/0.1.0.0 b/fixture/22/1/0.1.0.0 deleted file mode 100644 index a2eeb93a20..0000000000 Binary files a/fixture/22/1/0.1.0.0 and /dev/null differ diff --git a/fixture/22/1/0.1.0.1 b/fixture/22/1/0.1.0.1 deleted file mode 100644 index e56702b7dc..0000000000 Binary files a/fixture/22/1/0.1.0.1 and /dev/null differ diff --git a/fixture/22/1/0.1.0.2 b/fixture/22/1/0.1.0.2 deleted file mode 100644 index 705b3e4625..0000000000 Binary files a/fixture/22/1/0.1.0.2 and /dev/null differ diff --git a/fixture/22/1/0.1.0.3 b/fixture/22/1/0.1.0.3 deleted file mode 100644 index 428ce7748e..0000000000 --- a/fixture/22/1/0.1.0.3 +++ /dev/null @@ -1 +0,0 @@ -xM1(.]ʕ+W.An dn0H2I7$ L`L K&I%$eM_RK*q<?֚T85;?8_8x_q=~}㖎W,^FWt\FfO|UOG\0.M/z%"Ӎx 鏞}|/Nv*^ 7X!$$Z.g?03 \ No newline at end of file diff --git a/fixture/22/1/0.1.1.0 b/fixture/22/1/0.1.1.0 deleted file mode 100644 index 0de0db05a2..0000000000 Binary files a/fixture/22/1/0.1.1.0 and /dev/null differ diff --git a/fixture/22/1/0.1.1.1 b/fixture/22/1/0.1.1.1 deleted file mode 100644 index 6de9b36307..0000000000 Binary files a/fixture/22/1/0.1.1.1 and /dev/null differ diff --git a/fixture/22/1/0.1.1.2 b/fixture/22/1/0.1.1.2 deleted file mode 100644 index ddd0e73d69..0000000000 Binary files a/fixture/22/1/0.1.1.2 and /dev/null differ diff --git a/fixture/22/1/0.1.1.3 b/fixture/22/1/0.1.1.3 deleted file mode 100644 index 4560d14cf0..0000000000 Binary files a/fixture/22/1/0.1.1.3 and /dev/null differ diff --git a/fixture/22/1/0.1.2.0 b/fixture/22/1/0.1.2.0 deleted file mode 100644 index d6c4613ac8..0000000000 Binary files a/fixture/22/1/0.1.2.0 and /dev/null differ diff --git a/fixture/22/1/0.1.2.1 b/fixture/22/1/0.1.2.1 deleted file mode 100644 index d4971aa88b..0000000000 Binary files a/fixture/22/1/0.1.2.1 and /dev/null differ diff --git a/fixture/22/1/0.1.2.2 b/fixture/22/1/0.1.2.2 deleted file mode 100644 index f96c8d124d..0000000000 Binary files a/fixture/22/1/0.1.2.2 and /dev/null differ diff --git a/fixture/22/1/0.1.2.3 b/fixture/22/1/0.1.2.3 deleted file mode 100644 index abba8e8445..0000000000 Binary files a/fixture/22/1/0.1.2.3 and /dev/null differ diff --git a/fixture/22/1/0.1.3.0 b/fixture/22/1/0.1.3.0 deleted file mode 100644 index 96d51927fd..0000000000 Binary files a/fixture/22/1/0.1.3.0 and /dev/null differ diff --git a/fixture/22/1/0.1.3.1 b/fixture/22/1/0.1.3.1 deleted file mode 100644 index b00a9c93cc..0000000000 Binary files a/fixture/22/1/0.1.3.1 and /dev/null differ diff --git a/fixture/22/1/0.1.3.2 b/fixture/22/1/0.1.3.2 deleted file mode 100644 index 827cfb46de..0000000000 Binary files a/fixture/22/1/0.1.3.2 and /dev/null differ diff --git a/fixture/22/1/0.1.3.3 b/fixture/22/1/0.1.3.3 deleted file mode 100644 index 7a2b49c66a..0000000000 Binary files a/fixture/22/1/0.1.3.3 and /dev/null differ diff --git a/fixture/22/1/0.2.0.0 b/fixture/22/1/0.2.0.0 deleted file mode 100644 index 139772498c..0000000000 Binary files a/fixture/22/1/0.2.0.0 and /dev/null differ diff --git a/fixture/22/1/0.2.0.1 b/fixture/22/1/0.2.0.1 deleted file mode 100644 index 669013241e..0000000000 Binary files a/fixture/22/1/0.2.0.1 and /dev/null differ diff --git a/fixture/22/1/0.2.0.2 b/fixture/22/1/0.2.0.2 deleted file mode 100644 index 9f6313e3a0..0000000000 Binary files a/fixture/22/1/0.2.0.2 and /dev/null differ diff --git a/fixture/22/1/0.2.0.3 b/fixture/22/1/0.2.0.3 deleted file mode 100644 index f2268aed5a..0000000000 --- a/fixture/22/1/0.2.0.3 +++ /dev/null @@ -1,2 +0,0 @@ -xM1(P;ʅSGБp$ 7d0p 2d0`L I&$tAI2 IAyӧ}_)8+x=~£F&1p%6x ]y\h$U|O/Nَq?po]?7Nv_ie7-~/=}?<\g -C6: Of>>5| ù،;sq`l:q/5\pr8E\2 \ No newline at end of file diff --git a/fixture/22/1/0.2.1.0 b/fixture/22/1/0.2.1.0 deleted file mode 100644 index 0f9b103087..0000000000 Binary files a/fixture/22/1/0.2.1.0 and /dev/null differ diff --git a/fixture/22/1/0.2.1.1 b/fixture/22/1/0.2.1.1 deleted file mode 100644 index 3d86981a9e..0000000000 Binary files a/fixture/22/1/0.2.1.1 and /dev/null differ diff --git a/fixture/22/1/0.2.1.2 b/fixture/22/1/0.2.1.2 deleted file mode 100644 index 9411b4d7e4..0000000000 Binary files a/fixture/22/1/0.2.1.2 and /dev/null differ diff --git a/fixture/22/1/0.2.1.3 b/fixture/22/1/0.2.1.3 deleted file mode 100644 index 5c74d709aa..0000000000 --- a/fixture/22/1/0.2.1.3 +++ /dev/null @@ -1,2 +0,0 @@ -xM1(`qE!#Jp$`dL 2d 2n0I&$tAI $}+%jo*x.Y$2fo -~/x!:xbWq_[ܕNlsl<.c|pcK4q-#x>o 5 ?*쎿~p'xWc3s2D?،,Os/qj868.N4c \ No newline at end of file diff --git a/fixture/22/1/0.2.2.0 b/fixture/22/1/0.2.2.0 deleted file mode 100644 index cdfdde98bd..0000000000 Binary files a/fixture/22/1/0.2.2.0 and /dev/null differ diff --git a/fixture/22/1/0.2.2.1 b/fixture/22/1/0.2.2.1 deleted file mode 100644 index b244b2247e..0000000000 Binary files a/fixture/22/1/0.2.2.1 and /dev/null differ diff --git a/fixture/22/1/0.2.2.2 b/fixture/22/1/0.2.2.2 deleted file mode 100644 index bf7e5d7563..0000000000 Binary files a/fixture/22/1/0.2.2.2 and /dev/null differ diff --git a/fixture/22/1/0.2.2.3 b/fixture/22/1/0.2.2.3 deleted file mode 100644 index acec461696..0000000000 --- a/fixture/22/1/0.2.2.3 +++ /dev/null @@ -1 +0,0 @@ -xM1(`;B\\uԑIp$`dd0\2I&  2 2d0`L I&$};nH$x-\n>>5\Kt2.f*~ox<I<Yko=Φ3Gq36xs|ƽ8'3o gpom\Əw6íq_+\4',.S|+6c3 x,3 \ No newline at end of file diff --git a/fixture/22/1/0.2.3.0 b/fixture/22/1/0.2.3.0 deleted file mode 100644 index 0d7a005610..0000000000 Binary files a/fixture/22/1/0.2.3.0 and /dev/null differ diff --git a/fixture/22/1/0.2.3.1 b/fixture/22/1/0.2.3.1 deleted file mode 100644 index bcc1fa9083..0000000000 Binary files a/fixture/22/1/0.2.3.1 and /dev/null differ diff --git a/fixture/22/1/0.2.3.2 b/fixture/22/1/0.2.3.2 deleted file mode 100644 index 49c59ffb38..0000000000 Binary files a/fixture/22/1/0.2.3.2 and /dev/null differ diff --git a/fixture/22/1/0.2.3.3 b/fixture/22/1/0.2.3.3 deleted file mode 100644 index f9500e9127..0000000000 Binary files a/fixture/22/1/0.2.3.3 and /dev/null differ diff --git a/fixture/22/1/0.3.0.0 b/fixture/22/1/0.3.0.0 deleted file mode 100644 index e015279f47..0000000000 Binary files a/fixture/22/1/0.3.0.0 and /dev/null differ diff --git a/fixture/22/1/0.3.0.1 b/fixture/22/1/0.3.0.1 deleted file mode 100644 index 19b542b76b..0000000000 Binary files a/fixture/22/1/0.3.0.1 and /dev/null differ diff --git a/fixture/22/1/0.3.0.2 b/fixture/22/1/0.3.0.2 deleted file mode 100644 index 9ef533faf9..0000000000 Binary files a/fixture/22/1/0.3.0.2 and /dev/null differ diff --git a/fixture/22/1/0.3.0.3 b/fixture/22/1/0.3.0.3 deleted file mode 100644 index d9172b1b5c..0000000000 --- a/fixture/22/1/0.3.0.3 +++ /dev/null @@ -1 +0,0 @@ -x!AaFqp`ffccl77 ˂, ,Ȳ!˂, ,ش{NyOVEnQȴ;"Ӳ~%eL>JeL!\~#d+Sdz?L:g2E?w \ No newline at end of file diff --git a/fixture/22/1/0.3.1.0 b/fixture/22/1/0.3.1.0 deleted file mode 100644 index d0b2cb0772..0000000000 Binary files a/fixture/22/1/0.3.1.0 and /dev/null differ diff --git a/fixture/22/1/0.3.1.1 b/fixture/22/1/0.3.1.1 deleted file mode 100644 index 16c06afc64..0000000000 Binary files a/fixture/22/1/0.3.1.1 and /dev/null differ diff --git a/fixture/22/1/0.3.1.2 b/fixture/22/1/0.3.1.2 deleted file mode 100644 index 9fb7108772..0000000000 Binary files a/fixture/22/1/0.3.1.2 and /dev/null differ diff --git a/fixture/22/1/0.3.1.3 b/fixture/22/1/0.3.1.3 deleted file mode 100644 index 2571f6116c..0000000000 --- a/fixture/22/1/0.3.1.3 +++ /dev/null @@ -1 +0,0 @@ -xEff'i4 xy!8q\}^ {OB\<7"q$eL1FneL )2*ky#Sdksy)Sd*K)2}fO-ؿL` \ No newline at end of file diff --git a/fixture/22/1/0.3.2.0 b/fixture/22/1/0.3.2.0 deleted file mode 100644 index b66a99fe3c..0000000000 Binary files a/fixture/22/1/0.3.2.0 and /dev/null differ diff --git a/fixture/22/1/0.3.2.1 b/fixture/22/1/0.3.2.1 deleted file mode 100644 index 4d4dcf5f2a..0000000000 Binary files a/fixture/22/1/0.3.2.1 and /dev/null differ diff --git a/fixture/22/1/0.3.2.2 b/fixture/22/1/0.3.2.2 deleted file mode 100644 index 5f8d768b12..0000000000 Binary files a/fixture/22/1/0.3.2.2 and /dev/null differ diff --git a/fixture/22/1/0.3.2.3 b/fixture/22/1/0.3.2.3 deleted file mode 100644 index 8556a301a3..0000000000 --- a/fixture/22/1/0.3.2.3 +++ /dev/null @@ -1 +0,0 @@ -x!AaFq p 7˂, ,Ȳ ˂, CȊv7ygQ ĭw!Sd:w_LcO2Em~'eLZ>)2g\/L3a[Sw \ No newline at end of file diff --git a/fixture/22/1/0.3.3.0 b/fixture/22/1/0.3.3.0 deleted file mode 100644 index 1e599375bc..0000000000 Binary files a/fixture/22/1/0.3.3.0 and /dev/null differ diff --git a/fixture/22/1/0.3.3.1 b/fixture/22/1/0.3.3.1 deleted file mode 100644 index 5e89372761..0000000000 Binary files a/fixture/22/1/0.3.3.1 and /dev/null differ diff --git a/fixture/22/1/0.3.3.2 b/fixture/22/1/0.3.3.2 deleted file mode 100644 index 51df60bc2e..0000000000 Binary files a/fixture/22/1/0.3.3.2 and /dev/null differ diff --git a/fixture/22/1/0.3.3.3 b/fixture/22/1/0.3.3.3 deleted file mode 100644 index 1a463a4ac7..0000000000 Binary files a/fixture/22/1/0.3.3.3 and /dev/null differ diff --git a/fixture/22/1/1.0.0.0 b/fixture/22/1/1.0.0.0 deleted file mode 100644 index b4f4bec7da..0000000000 Binary files a/fixture/22/1/1.0.0.0 and /dev/null differ diff --git a/fixture/22/1/1.0.0.1 b/fixture/22/1/1.0.0.1 deleted file mode 100644 index 17d1f7dfe1..0000000000 Binary files a/fixture/22/1/1.0.0.1 and /dev/null differ diff --git a/fixture/22/1/1.0.0.2 b/fixture/22/1/1.0.0.2 deleted file mode 100644 index 8f7d154fc1..0000000000 Binary files a/fixture/22/1/1.0.0.2 and /dev/null differ diff --git a/fixture/22/1/1.0.0.3 b/fixture/22/1/1.0.0.3 deleted file mode 100644 index b005dca87a..0000000000 Binary files a/fixture/22/1/1.0.0.3 and /dev/null differ diff --git a/fixture/22/1/1.0.1.0 b/fixture/22/1/1.0.1.0 deleted file mode 100644 index 7ec4e434ba..0000000000 Binary files a/fixture/22/1/1.0.1.0 and /dev/null differ diff --git a/fixture/22/1/1.0.1.1 b/fixture/22/1/1.0.1.1 deleted file mode 100644 index d2794dd4b1..0000000000 Binary files a/fixture/22/1/1.0.1.1 and /dev/null differ diff --git a/fixture/22/1/1.0.1.2 b/fixture/22/1/1.0.1.2 deleted file mode 100644 index 8ac5ad017a..0000000000 Binary files a/fixture/22/1/1.0.1.2 and /dev/null differ diff --git a/fixture/22/1/1.0.1.3 b/fixture/22/1/1.0.1.3 deleted file mode 100644 index 604faaa2fa..0000000000 --- a/fixture/22/1/1.0.1.3 +++ /dev/null @@ -1 +0,0 @@ -xM1(P+%d2`8ĕ+W. LI $`dn0H2IA2I2.$ Lyӧ}_6h\U|Onj$m88+xwp/jW:~Ooܔ _;\ ll4>'_ E?\lzqo]\9×%vKx Wht!¿89og<.Չo-p љ%<7Eo=t1:8#A \ No newline at end of file diff --git a/fixture/22/1/1.0.2.0 b/fixture/22/1/1.0.2.0 deleted file mode 100644 index 9cd3ede04e..0000000000 Binary files a/fixture/22/1/1.0.2.0 and /dev/null differ diff --git a/fixture/22/1/1.0.2.1 b/fixture/22/1/1.0.2.1 deleted file mode 100644 index 575cd0da27..0000000000 Binary files a/fixture/22/1/1.0.2.1 and /dev/null differ diff --git a/fixture/22/1/1.0.2.2 b/fixture/22/1/1.0.2.2 deleted file mode 100644 index 9c50735597..0000000000 Binary files a/fixture/22/1/1.0.2.2 and /dev/null differ diff --git a/fixture/22/1/1.0.2.3 b/fixture/22/1/1.0.2.3 deleted file mode 100644 index e31319533e..0000000000 Binary files a/fixture/22/1/1.0.2.3 and /dev/null differ diff --git a/fixture/22/1/1.0.3.0 b/fixture/22/1/1.0.3.0 deleted file mode 100644 index 8cb94f9044..0000000000 Binary files a/fixture/22/1/1.0.3.0 and /dev/null differ diff --git a/fixture/22/1/1.0.3.1 b/fixture/22/1/1.0.3.1 deleted file mode 100644 index 338f0ff828..0000000000 Binary files a/fixture/22/1/1.0.3.1 and /dev/null differ diff --git a/fixture/22/1/1.0.3.2 b/fixture/22/1/1.0.3.2 deleted file mode 100644 index 716fd23788..0000000000 Binary files a/fixture/22/1/1.0.3.2 and /dev/null differ diff --git a/fixture/22/1/1.0.3.3 b/fixture/22/1/1.0.3.3 deleted file mode 100644 index 8ec8a7accc..0000000000 Binary files a/fixture/22/1/1.0.3.3 and /dev/null differ diff --git a/fixture/22/1/1.1.0.0 b/fixture/22/1/1.1.0.0 deleted file mode 100644 index 9c9955a5bd..0000000000 Binary files a/fixture/22/1/1.1.0.0 and /dev/null differ diff --git a/fixture/22/1/1.1.0.1 b/fixture/22/1/1.1.0.1 deleted file mode 100644 index 3836072634..0000000000 Binary files a/fixture/22/1/1.1.0.1 and /dev/null differ diff --git a/fixture/22/1/1.1.0.2 b/fixture/22/1/1.1.0.2 deleted file mode 100644 index b2f77aad18..0000000000 Binary files a/fixture/22/1/1.1.0.2 and /dev/null differ diff --git a/fixture/22/1/1.1.0.3 b/fixture/22/1/1.1.0.3 deleted file mode 100644 index 6e9d01f504..0000000000 Binary files a/fixture/22/1/1.1.0.3 and /dev/null differ diff --git a/fixture/22/1/1.1.1.0 b/fixture/22/1/1.1.1.0 deleted file mode 100644 index 2b676a23af..0000000000 Binary files a/fixture/22/1/1.1.1.0 and /dev/null differ diff --git a/fixture/22/1/1.1.1.1 b/fixture/22/1/1.1.1.1 deleted file mode 100644 index 05e34d40ae..0000000000 Binary files a/fixture/22/1/1.1.1.1 and /dev/null differ diff --git a/fixture/22/1/1.1.1.2 b/fixture/22/1/1.1.1.2 deleted file mode 100644 index 88e594c031..0000000000 Binary files a/fixture/22/1/1.1.1.2 and /dev/null differ diff --git a/fixture/22/1/1.1.1.3 b/fixture/22/1/1.1.1.3 deleted file mode 100644 index 3002f080b3..0000000000 Binary files a/fixture/22/1/1.1.1.3 and /dev/null differ diff --git a/fixture/22/1/1.1.2.0 b/fixture/22/1/1.1.2.0 deleted file mode 100644 index fc1e0fa17d..0000000000 Binary files a/fixture/22/1/1.1.2.0 and /dev/null differ diff --git a/fixture/22/1/1.1.2.1 b/fixture/22/1/1.1.2.1 deleted file mode 100644 index 663523a385..0000000000 Binary files a/fixture/22/1/1.1.2.1 and /dev/null differ diff --git a/fixture/22/1/1.1.2.2 b/fixture/22/1/1.1.2.2 deleted file mode 100644 index b4f75d972f..0000000000 Binary files a/fixture/22/1/1.1.2.2 and /dev/null differ diff --git a/fixture/22/1/1.1.2.3 b/fixture/22/1/1.1.2.3 deleted file mode 100644 index b1168f5205..0000000000 Binary files a/fixture/22/1/1.1.2.3 and /dev/null differ diff --git a/fixture/22/1/1.1.3.0 b/fixture/22/1/1.1.3.0 deleted file mode 100644 index 25f5b400af..0000000000 Binary files a/fixture/22/1/1.1.3.0 and /dev/null differ diff --git a/fixture/22/1/1.1.3.1 b/fixture/22/1/1.1.3.1 deleted file mode 100644 index 4c80fc02d5..0000000000 Binary files a/fixture/22/1/1.1.3.1 and /dev/null differ diff --git a/fixture/22/1/1.1.3.2 b/fixture/22/1/1.1.3.2 deleted file mode 100644 index 980b917e89..0000000000 Binary files a/fixture/22/1/1.1.3.2 and /dev/null differ diff --git a/fixture/22/1/1.1.3.3 b/fixture/22/1/1.1.3.3 deleted file mode 100644 index d28b3cc243..0000000000 Binary files a/fixture/22/1/1.1.3.3 and /dev/null differ diff --git a/fixture/22/1/1.2.0.0 b/fixture/22/1/1.2.0.0 deleted file mode 100644 index 288070000e..0000000000 Binary files a/fixture/22/1/1.2.0.0 and /dev/null differ diff --git a/fixture/22/1/1.2.0.1 b/fixture/22/1/1.2.0.1 deleted file mode 100644 index 896e6cec3f..0000000000 Binary files a/fixture/22/1/1.2.0.1 and /dev/null differ diff --git a/fixture/22/1/1.2.0.2 b/fixture/22/1/1.2.0.2 deleted file mode 100644 index 324d767d44..0000000000 Binary files a/fixture/22/1/1.2.0.2 and /dev/null differ diff --git a/fixture/22/1/1.2.0.3 b/fixture/22/1/1.2.0.3 deleted file mode 100644 index 1304af4cc0..0000000000 Binary files a/fixture/22/1/1.2.0.3 and /dev/null differ diff --git a/fixture/22/1/1.2.1.0 b/fixture/22/1/1.2.1.0 deleted file mode 100644 index cef5978395..0000000000 Binary files a/fixture/22/1/1.2.1.0 and /dev/null differ diff --git a/fixture/22/1/1.2.1.1 b/fixture/22/1/1.2.1.1 deleted file mode 100644 index 58e64613d0..0000000000 Binary files a/fixture/22/1/1.2.1.1 and /dev/null differ diff --git a/fixture/22/1/1.2.1.2 b/fixture/22/1/1.2.1.2 deleted file mode 100644 index 4240dc12bd..0000000000 Binary files a/fixture/22/1/1.2.1.2 and /dev/null differ diff --git a/fixture/22/1/1.2.1.3 b/fixture/22/1/1.2.1.3 deleted file mode 100644 index 9cac51028c..0000000000 Binary files a/fixture/22/1/1.2.1.3 and /dev/null differ diff --git a/fixture/22/1/1.2.2.0 b/fixture/22/1/1.2.2.0 deleted file mode 100644 index 770dfd64fe..0000000000 Binary files a/fixture/22/1/1.2.2.0 and /dev/null differ diff --git a/fixture/22/1/1.2.2.1 b/fixture/22/1/1.2.2.1 deleted file mode 100644 index 8144a94251..0000000000 Binary files a/fixture/22/1/1.2.2.1 and /dev/null differ diff --git a/fixture/22/1/1.2.2.2 b/fixture/22/1/1.2.2.2 deleted file mode 100644 index 7819de9e08..0000000000 Binary files a/fixture/22/1/1.2.2.2 and /dev/null differ diff --git a/fixture/22/1/1.2.2.3 b/fixture/22/1/1.2.2.3 deleted file mode 100644 index b4d098e108..0000000000 Binary files a/fixture/22/1/1.2.2.3 and /dev/null differ diff --git a/fixture/22/1/1.2.3.0 b/fixture/22/1/1.2.3.0 deleted file mode 100644 index 76254ea78b..0000000000 Binary files a/fixture/22/1/1.2.3.0 and /dev/null differ diff --git a/fixture/22/1/1.2.3.1 b/fixture/22/1/1.2.3.1 deleted file mode 100644 index 1b26cc2878..0000000000 Binary files a/fixture/22/1/1.2.3.1 and /dev/null differ diff --git a/fixture/22/1/1.2.3.2 b/fixture/22/1/1.2.3.2 deleted file mode 100644 index 3e9c7e2b75..0000000000 Binary files a/fixture/22/1/1.2.3.2 and /dev/null differ diff --git a/fixture/22/1/1.2.3.3 b/fixture/22/1/1.2.3.3 deleted file mode 100644 index dea4cd79c7..0000000000 Binary files a/fixture/22/1/1.2.3.3 and /dev/null differ diff --git a/fixture/22/1/1.3.0.0 b/fixture/22/1/1.3.0.0 deleted file mode 100644 index 1267f29539..0000000000 Binary files a/fixture/22/1/1.3.0.0 and /dev/null differ diff --git a/fixture/22/1/1.3.0.1 b/fixture/22/1/1.3.0.1 deleted file mode 100644 index f19f044789..0000000000 Binary files a/fixture/22/1/1.3.0.1 and /dev/null differ diff --git a/fixture/22/1/1.3.0.2 b/fixture/22/1/1.3.0.2 deleted file mode 100644 index 59772c916c..0000000000 Binary files a/fixture/22/1/1.3.0.2 and /dev/null differ diff --git a/fixture/22/1/1.3.0.3 b/fixture/22/1/1.3.0.3 deleted file mode 100644 index 85fc7f6e2b..0000000000 --- a/fixture/22/1/1.3.0.3 +++ /dev/null @@ -1 +0,0 @@ -x!AaFqUeAMl ˂, ,Ȳ!˂, 2oyNz*WACG$SdLig72Eѐ})/eL1BM%SdL9K~n w!Sd \ No newline at end of file diff --git a/fixture/22/1/1.3.1.0 b/fixture/22/1/1.3.1.0 deleted file mode 100644 index 022e119918..0000000000 Binary files a/fixture/22/1/1.3.1.0 and /dev/null differ diff --git a/fixture/22/1/1.3.1.1 b/fixture/22/1/1.3.1.1 deleted file mode 100644 index 76c7f19ece..0000000000 Binary files a/fixture/22/1/1.3.1.1 and /dev/null differ diff --git a/fixture/22/1/1.3.1.2 b/fixture/22/1/1.3.1.2 deleted file mode 100644 index 0def4f0cf4..0000000000 Binary files a/fixture/22/1/1.3.1.2 and /dev/null differ diff --git a/fixture/22/1/1.3.1.3 b/fixture/22/1/1.3.1.3 deleted file mode 100644 index 7d7d7bb695..0000000000 --- a/fixture/22/1/1.3.1.3 +++ /dev/null @@ -1 +0,0 @@ -x!AaFqEd&۵ndYeAYdهeA9yo} Z{/ B\G|p2EӘYS(SdoLi9g_k"Ӱ`?2Enɾ'eL}{;2LGGs \ No newline at end of file diff --git a/fixture/22/1/1.3.2.0 b/fixture/22/1/1.3.2.0 deleted file mode 100644 index 0ae0915622..0000000000 Binary files a/fixture/22/1/1.3.2.0 and /dev/null differ diff --git a/fixture/22/1/1.3.2.1 b/fixture/22/1/1.3.2.1 deleted file mode 100644 index d697b38934..0000000000 Binary files a/fixture/22/1/1.3.2.1 and /dev/null differ diff --git a/fixture/22/1/1.3.2.2 b/fixture/22/1/1.3.2.2 deleted file mode 100644 index 7f921ebdfc..0000000000 Binary files a/fixture/22/1/1.3.2.2 and /dev/null differ diff --git a/fixture/22/1/1.3.2.3 b/fixture/22/1/1.3.2.3 deleted file mode 100644 index 7f79d8c9cd..0000000000 --- a/fixture/22/1/1.3.2.3 +++ /dev/null @@ -1 +0,0 @@ -x!AFa,Ț 66r 7˂, , ,Ȳs' Z}!#L>ak"e*dL|)2U { Sd,r)SdLeߗ2E}Gϭ \ No newline at end of file diff --git a/fixture/22/1/1.3.3.0 b/fixture/22/1/1.3.3.0 deleted file mode 100644 index 4751c02b60..0000000000 Binary files a/fixture/22/1/1.3.3.0 and /dev/null differ diff --git a/fixture/22/1/1.3.3.1 b/fixture/22/1/1.3.3.1 deleted file mode 100644 index 4d6ffd8ce9..0000000000 Binary files a/fixture/22/1/1.3.3.1 and /dev/null differ diff --git a/fixture/22/1/1.3.3.2 b/fixture/22/1/1.3.3.2 deleted file mode 100644 index b3a45852a4..0000000000 Binary files a/fixture/22/1/1.3.3.2 and /dev/null differ diff --git a/fixture/22/1/1.3.3.3 b/fixture/22/1/1.3.3.3 deleted file mode 100644 index 376561b1e0..0000000000 Binary files a/fixture/22/1/1.3.3.3 and /dev/null differ diff --git a/fixture/22/2/.zarray b/fixture/22/2/.zarray deleted file mode 100644 index 88ed80efcc..0000000000 --- a/fixture/22/2/.zarray +++ /dev/null @@ -1,23 +0,0 @@ -{ - "chunks": [ - 10, - 3, - 3, - 3 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "ѵ>~#F?pǜpJ3IiY昧e*,]3\ppUDˬPg5ɥ@QgILD \ No newline at end of file diff --git a/fixture/23/1/0.3.0.2 b/fixture/23/1/0.3.0.2 deleted file mode 100644 index 2b2c4141cf..0000000000 --- a/fixture/23/1/0.3.0.2 +++ /dev/null @@ -1 +0,0 @@ -xM`϶X"(Xb6*m6v@=>{{OSZq\qMt+VYc 6b:;D7\L)1d"zd}8Uȑ@iJ!Wo'_|ӡKK0K*s̳"5rJ}3 CDwc6w#Oi] Hv8qƃ|c 3lJ#A 9 -tt \ No newline at end of file diff --git a/fixture/23/1/0.3.3.3 b/fixture/23/1/0.3.3.3 deleted file mode 100644 index 668895bf37..0000000000 Binary files a/fixture/23/1/0.3.3.3 and /dev/null differ diff --git a/fixture/23/1/1.0.0.0 b/fixture/23/1/1.0.0.0 deleted file mode 100644 index fc770b2b08..0000000000 Binary files a/fixture/23/1/1.0.0.0 and /dev/null differ diff --git a/fixture/23/1/1.0.0.1 b/fixture/23/1/1.0.0.1 deleted file mode 100644 index 35bde18a3c..0000000000 Binary files a/fixture/23/1/1.0.0.1 and /dev/null differ diff --git a/fixture/23/1/1.0.0.2 b/fixture/23/1/1.0.0.2 deleted file mode 100644 index ec2904ed32..0000000000 Binary files a/fixture/23/1/1.0.0.2 and /dev/null differ diff --git a/fixture/23/1/1.0.0.3 b/fixture/23/1/1.0.0.3 deleted file mode 100644 index 2b3c6b999d..0000000000 Binary files a/fixture/23/1/1.0.0.3 and /dev/null differ diff --git a/fixture/23/1/1.0.1.0 b/fixture/23/1/1.0.1.0 deleted file mode 100644 index bf299471af..0000000000 Binary files a/fixture/23/1/1.0.1.0 and /dev/null differ diff --git a/fixture/23/1/1.0.1.1 b/fixture/23/1/1.0.1.1 deleted file mode 100644 index 30167a5788..0000000000 Binary files a/fixture/23/1/1.0.1.1 and /dev/null differ diff --git a/fixture/23/1/1.0.1.2 b/fixture/23/1/1.0.1.2 deleted file mode 100644 index 862e662ca1..0000000000 Binary files a/fixture/23/1/1.0.1.2 and /dev/null differ diff --git a/fixture/23/1/1.0.1.3 b/fixture/23/1/1.0.1.3 deleted file mode 100644 index 8b83810e65..0000000000 Binary files a/fixture/23/1/1.0.1.3 and /dev/null differ diff --git a/fixture/23/1/1.0.2.0 b/fixture/23/1/1.0.2.0 deleted file mode 100644 index f12a0c7038..0000000000 Binary files a/fixture/23/1/1.0.2.0 and /dev/null differ diff --git a/fixture/23/1/1.0.2.1 b/fixture/23/1/1.0.2.1 deleted file mode 100644 index d65e47ec8f..0000000000 Binary files a/fixture/23/1/1.0.2.1 and /dev/null differ diff --git a/fixture/23/1/1.0.2.2 b/fixture/23/1/1.0.2.2 deleted file mode 100644 index aeeba5031d..0000000000 Binary files a/fixture/23/1/1.0.2.2 and /dev/null differ diff --git a/fixture/23/1/1.0.2.3 b/fixture/23/1/1.0.2.3 deleted file mode 100644 index 1b2fb6b86c..0000000000 Binary files a/fixture/23/1/1.0.2.3 and /dev/null differ diff --git a/fixture/23/1/1.0.3.0 b/fixture/23/1/1.0.3.0 deleted file mode 100644 index bb01a5b4f0..0000000000 Binary files a/fixture/23/1/1.0.3.0 and /dev/null differ diff --git a/fixture/23/1/1.0.3.1 b/fixture/23/1/1.0.3.1 deleted file mode 100644 index f7bdc6937a..0000000000 Binary files a/fixture/23/1/1.0.3.1 and /dev/null differ diff --git a/fixture/23/1/1.0.3.2 b/fixture/23/1/1.0.3.2 deleted file mode 100644 index 2e7553923f..0000000000 Binary files a/fixture/23/1/1.0.3.2 and /dev/null differ diff --git a/fixture/23/1/1.0.3.3 b/fixture/23/1/1.0.3.3 deleted file mode 100644 index 6a0ad4ffeb..0000000000 Binary files a/fixture/23/1/1.0.3.3 and /dev/null differ diff --git a/fixture/23/1/1.1.0.0 b/fixture/23/1/1.1.0.0 deleted file mode 100644 index 11919bdcff..0000000000 Binary files a/fixture/23/1/1.1.0.0 and /dev/null differ diff --git a/fixture/23/1/1.1.0.1 b/fixture/23/1/1.1.0.1 deleted file mode 100644 index 23388a5167..0000000000 Binary files a/fixture/23/1/1.1.0.1 and /dev/null differ diff --git a/fixture/23/1/1.1.0.2 b/fixture/23/1/1.1.0.2 deleted file mode 100644 index 05ac8f1ee8..0000000000 Binary files a/fixture/23/1/1.1.0.2 and /dev/null differ diff --git a/fixture/23/1/1.1.0.3 b/fixture/23/1/1.1.0.3 deleted file mode 100644 index c3a1a69eb4..0000000000 Binary files a/fixture/23/1/1.1.0.3 and /dev/null differ diff --git a/fixture/23/1/1.1.1.0 b/fixture/23/1/1.1.1.0 deleted file mode 100644 index 2efef30458..0000000000 Binary files a/fixture/23/1/1.1.1.0 and /dev/null differ diff --git a/fixture/23/1/1.1.1.1 b/fixture/23/1/1.1.1.1 deleted file mode 100644 index bd8dd14402..0000000000 Binary files a/fixture/23/1/1.1.1.1 and /dev/null differ diff --git a/fixture/23/1/1.1.1.2 b/fixture/23/1/1.1.1.2 deleted file mode 100644 index 085e779f5a..0000000000 Binary files a/fixture/23/1/1.1.1.2 and /dev/null differ diff --git a/fixture/23/1/1.1.1.3 b/fixture/23/1/1.1.1.3 deleted file mode 100644 index dd7567734d..0000000000 Binary files a/fixture/23/1/1.1.1.3 and /dev/null differ diff --git a/fixture/23/1/1.1.2.0 b/fixture/23/1/1.1.2.0 deleted file mode 100644 index 63dc924e74..0000000000 Binary files a/fixture/23/1/1.1.2.0 and /dev/null differ diff --git a/fixture/23/1/1.1.2.1 b/fixture/23/1/1.1.2.1 deleted file mode 100644 index 7c5445afac..0000000000 Binary files a/fixture/23/1/1.1.2.1 and /dev/null differ diff --git a/fixture/23/1/1.1.2.2 b/fixture/23/1/1.1.2.2 deleted file mode 100644 index 27fcd08b92..0000000000 Binary files a/fixture/23/1/1.1.2.2 and /dev/null differ diff --git a/fixture/23/1/1.1.2.3 b/fixture/23/1/1.1.2.3 deleted file mode 100644 index 0b92f63d18..0000000000 Binary files a/fixture/23/1/1.1.2.3 and /dev/null differ diff --git a/fixture/23/1/1.1.3.0 b/fixture/23/1/1.1.3.0 deleted file mode 100644 index b5d5d75829..0000000000 Binary files a/fixture/23/1/1.1.3.0 and /dev/null differ diff --git a/fixture/23/1/1.1.3.1 b/fixture/23/1/1.1.3.1 deleted file mode 100644 index b81b7bb917..0000000000 Binary files a/fixture/23/1/1.1.3.1 and /dev/null differ diff --git a/fixture/23/1/1.1.3.2 b/fixture/23/1/1.1.3.2 deleted file mode 100644 index 6acf2383f4..0000000000 Binary files a/fixture/23/1/1.1.3.2 and /dev/null differ diff --git a/fixture/23/1/1.1.3.3 b/fixture/23/1/1.1.3.3 deleted file mode 100644 index 3f1c37fd81..0000000000 Binary files a/fixture/23/1/1.1.3.3 and /dev/null differ diff --git a/fixture/23/1/1.2.0.0 b/fixture/23/1/1.2.0.0 deleted file mode 100644 index 21c3bba181..0000000000 Binary files a/fixture/23/1/1.2.0.0 and /dev/null differ diff --git a/fixture/23/1/1.2.0.1 b/fixture/23/1/1.2.0.1 deleted file mode 100644 index 730003bad1..0000000000 Binary files a/fixture/23/1/1.2.0.1 and /dev/null differ diff --git a/fixture/23/1/1.2.0.2 b/fixture/23/1/1.2.0.2 deleted file mode 100644 index bc46ae016c..0000000000 Binary files a/fixture/23/1/1.2.0.2 and /dev/null differ diff --git a/fixture/23/1/1.2.0.3 b/fixture/23/1/1.2.0.3 deleted file mode 100644 index 4fc192ae12..0000000000 Binary files a/fixture/23/1/1.2.0.3 and /dev/null differ diff --git a/fixture/23/1/1.2.1.0 b/fixture/23/1/1.2.1.0 deleted file mode 100644 index 00d512da18..0000000000 Binary files a/fixture/23/1/1.2.1.0 and /dev/null differ diff --git a/fixture/23/1/1.2.1.1 b/fixture/23/1/1.2.1.1 deleted file mode 100644 index 120c47c4c2..0000000000 Binary files a/fixture/23/1/1.2.1.1 and /dev/null differ diff --git a/fixture/23/1/1.2.1.2 b/fixture/23/1/1.2.1.2 deleted file mode 100644 index 6d368743d6..0000000000 Binary files a/fixture/23/1/1.2.1.2 and /dev/null differ diff --git a/fixture/23/1/1.2.1.3 b/fixture/23/1/1.2.1.3 deleted file mode 100644 index a3fee206b1..0000000000 Binary files a/fixture/23/1/1.2.1.3 and /dev/null differ diff --git a/fixture/23/1/1.2.2.0 b/fixture/23/1/1.2.2.0 deleted file mode 100644 index 216788e3ca..0000000000 Binary files a/fixture/23/1/1.2.2.0 and /dev/null differ diff --git a/fixture/23/1/1.2.2.1 b/fixture/23/1/1.2.2.1 deleted file mode 100644 index d04a4231f3..0000000000 Binary files a/fixture/23/1/1.2.2.1 and /dev/null differ diff --git a/fixture/23/1/1.2.2.2 b/fixture/23/1/1.2.2.2 deleted file mode 100644 index 33ae8e0153..0000000000 Binary files a/fixture/23/1/1.2.2.2 and /dev/null differ diff --git a/fixture/23/1/1.2.2.3 b/fixture/23/1/1.2.2.3 deleted file mode 100644 index 9b3dc9b7e6..0000000000 Binary files a/fixture/23/1/1.2.2.3 and /dev/null differ diff --git a/fixture/23/1/1.2.3.0 b/fixture/23/1/1.2.3.0 deleted file mode 100644 index 0de827b6c5..0000000000 Binary files a/fixture/23/1/1.2.3.0 and /dev/null differ diff --git a/fixture/23/1/1.2.3.1 b/fixture/23/1/1.2.3.1 deleted file mode 100644 index b687a3d431..0000000000 Binary files a/fixture/23/1/1.2.3.1 and /dev/null differ diff --git a/fixture/23/1/1.2.3.2 b/fixture/23/1/1.2.3.2 deleted file mode 100644 index e50f99a83a..0000000000 Binary files a/fixture/23/1/1.2.3.2 and /dev/null differ diff --git a/fixture/23/1/1.2.3.3 b/fixture/23/1/1.2.3.3 deleted file mode 100644 index 8014aa5179..0000000000 --- a/fixture/23/1/1.2.3.3 +++ /dev/null @@ -1 +0,0 @@ -x P@M~P#ʖid89•wz<x1f×YsH͉T:| \ No newline at end of file diff --git a/fixture/23/1/1.3.0.0 b/fixture/23/1/1.3.0.0 deleted file mode 100644 index 8b7c6e9377..0000000000 Binary files a/fixture/23/1/1.3.0.0 and /dev/null differ diff --git a/fixture/23/1/1.3.0.1 b/fixture/23/1/1.3.0.1 deleted file mode 100644 index e12851ec39..0000000000 Binary files a/fixture/23/1/1.3.0.1 and /dev/null differ diff --git a/fixture/23/1/1.3.0.2 b/fixture/23/1/1.3.0.2 deleted file mode 100644 index c3edd08ea3..0000000000 --- a/fixture/23/1/1.3.0.2 +++ /dev/null @@ -1 +0,0 @@ -xg2`ON$QR" v( nnp}f;so)rG{xDKi=s!Gt8&|JYrYdeV(P$NN8s.k+DUXB-40#2Fq dmaIfY'^y>b7Y46 \ No newline at end of file diff --git a/fixture/23/1/1.3.0.3 b/fixture/23/1/1.3.0.3 deleted file mode 100644 index 33e57a0ae4..0000000000 Binary files a/fixture/23/1/1.3.0.3 and /dev/null differ diff --git a/fixture/23/1/1.3.1.0 b/fixture/23/1/1.3.1.0 deleted file mode 100644 index e3fab0e36b..0000000000 --- a/fixture/23/1/1.3.1.0 +++ /dev/null @@ -1 +0,0 @@ -xW2`+qQF!Zd'!Nt lynlJnzK 0Ht6y^yulNimve}(sHt/|/DWǜpT ֔f3,H%%U['B?,*kA u41"Y8 \ No newline at end of file diff --git a/fixture/23/1/1.3.1.1 b/fixture/23/1/1.3.1.1 deleted file mode 100644 index 1c3d2c6049..0000000000 --- a/fixture/23/1/1.3.1.1 +++ /dev/null @@ -1 +0,0 @@ -xQPEW P0vc`ЍYȯg98cN8s.n)2+Rcu4 \JÌ0J,93Nt?l6;4i0$LQ4=|^y\fPeyX$}>~c@tjJ\q nN6R \ No newline at end of file diff --git a/fixture/23/1/1.3.1.2 b/fixture/23/1/1.3.1.2 deleted file mode 100644 index c686f9d5ee..0000000000 Binary files a/fixture/23/1/1.3.1.2 and /dev/null differ diff --git a/fixture/23/1/1.3.1.3 b/fixture/23/1/1.3.1.3 deleted file mode 100644 index d39734f63d..0000000000 Binary files a/fixture/23/1/1.3.1.3 and /dev/null differ diff --git a/fixture/23/1/1.3.2.0 b/fixture/23/1/1.3.2.0 deleted file mode 100644 index 0c7fd416e0..0000000000 --- a/fixture/23/1/1.3.2.0 +++ /dev/null @@ -1 +0,0 @@ -xQBQEkaNsF؍Y1t.{{fN!]C8SRRcYNe[L~@F%Xa5`-.aaFe9Dwgy^i!L2430KtmN>*)59\q D(~ \ No newline at end of file diff --git a/fixture/23/1/1.3.2.1 b/fixture/23/1/1.3.2.1 deleted file mode 100644 index d9ba37a9a8..0000000000 Binary files a/fixture/23/1/1.3.2.1 and /dev/null differ diff --git a/fixture/23/1/1.3.2.2 b/fixture/23/1/1.3.2.2 deleted file mode 100644 index e7c7cd1f2d..0000000000 Binary files a/fixture/23/1/1.3.2.2 and /dev/null differ diff --git a/fixture/23/1/1.3.2.3 b/fixture/23/1/1.3.2.3 deleted file mode 100644 index 5d7cec912f..0000000000 Binary files a/fixture/23/1/1.3.2.3 and /dev/null differ diff --git a/fixture/23/1/1.3.3.0 b/fixture/23/1/1.3.3.0 deleted file mode 100644 index 43e394eabf..0000000000 --- a/fixture/23/1/1.3.3.0 +++ /dev/null @@ -1,2 +0,0 @@ -x7PQRܐrzxߙc`WOڰ 7}A=0cv 1Ȑ@ -.ہ \ No newline at end of file diff --git a/fixture/23/1/1.3.3.1 b/fixture/23/1/1.3.3.1 deleted file mode 100644 index d22239bb91..0000000000 --- a/fixture/23/1/1.3.3.1 +++ /dev/null @@ -1 +0,0 @@ -x7P[J:Jro^wغc 3lR D RdX+iÎ'.xv \ No newline at end of file diff --git a/fixture/23/1/1.3.3.2 b/fixture/23/1/1.3.3.2 deleted file mode 100644 index 017470d76c..0000000000 --- a/fixture/23/1/1.3.3.2 +++ /dev/null @@ -1 +0,0 @@ -xз@Э t7-.y h ;p߁:c 3X~"H"C%*X~~ \ No newline at end of file diff --git a/fixture/23/1/1.3.3.3 b/fixture/23/1/1.3.3.3 deleted file mode 100644 index 784d171cf0..0000000000 Binary files a/fixture/23/1/1.3.3.3 and /dev/null differ diff --git a/fixture/23/2/.zarray b/fixture/23/2/.zarray deleted file mode 100644 index 06a917041e..0000000000 --- a/fixture/23/2/.zarray +++ /dev/null @@ -1,23 +0,0 @@ -{ - "chunks": [ - 10, - 3, - 3, - 3 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "ZoM_ b@B\K H*)mS,NQ \ No newline at end of file diff --git a/fixture/4/0/13 b/fixture/4/0/13 deleted file mode 100644 index f57b26d6c3..0000000000 --- a/fixture/4/0/13 +++ /dev/null @@ -1 +0,0 @@ -y.Q|5 rÔi9o -`QC [ \ No newline at end of file diff --git a/fixture/4/0/14 b/fixture/4/0/14 deleted file mode 100644 index f94817138a..0000000000 --- a/fixture/4/0/14 +++ /dev/null @@ -1 +0,0 @@ -%8l d2"|)]Mx4oxY`~%n>~:q1k:ŠY|ZIX9@9zC \ No newline at end of file diff --git a/fixture/4/0/18 b/fixture/4/0/18 deleted file mode 100644 index 4c620ed93e..0000000000 --- a/fixture/4/0/18 +++ /dev/null @@ -1 +0,0 @@ -U0y@  EM5t5\;~ djS94L;E @7kufTuTl; 1s]}Ni.gb;6.k#3! \ No newline at end of file diff --git a/fixture/4/0/19 b/fixture/4/0/19 deleted file mode 100644 index e92f942c4f..0000000000 Binary files a/fixture/4/0/19 and /dev/null differ diff --git a/fixture/4/0/2 b/fixture/4/0/2 deleted file mode 100644 index 883c1ade1c..0000000000 --- a/fixture/4/0/2 +++ /dev/null @@ -1,2 +0,0 @@ -"4;3 3 b+MY]4 -;S)[=np[=(~CL"a,8Pdg?B1.g]_LOl-#5;5i4T5 \ No newline at end of file diff --git a/fixture/4/0/20 b/fixture/4/0/20 deleted file mode 100644 index 6898070345..0000000000 --- a/fixture/4/0/20 +++ /dev/null @@ -1 +0,0 @@ -b=}u/¶XN.BtGe r4Fr8%DBo\jk7 KLk?![ (3UJmW$D . \ No newline at end of file diff --git a/fixture/4/0/21 b/fixture/4/0/21 deleted file mode 100644 index c51a1f4cd4..0000000000 Binary files a/fixture/4/0/21 and /dev/null differ diff --git a/fixture/4/0/22 b/fixture/4/0/22 deleted file mode 100644 index 766adbc56d..0000000000 Binary files a/fixture/4/0/22 and /dev/null differ diff --git a/fixture/4/0/3 b/fixture/4/0/3 deleted file mode 100644 index 346d3e3007..0000000000 Binary files a/fixture/4/0/3 and /dev/null differ diff --git a/fixture/4/0/4 b/fixture/4/0/4 deleted file mode 100644 index 1cffb0e029..0000000000 Binary files a/fixture/4/0/4 and /dev/null differ diff --git a/fixture/4/0/5 b/fixture/4/0/5 deleted file mode 100644 index e1cc79cb85..0000000000 Binary files a/fixture/4/0/5 and /dev/null differ diff --git a/fixture/4/0/6 b/fixture/4/0/6 deleted file mode 100644 index 333191f0cc..0000000000 Binary files a/fixture/4/0/6 and /dev/null differ diff --git a/fixture/4/0/7 b/fixture/4/0/7 deleted file mode 100644 index 670ef1274e..0000000000 Binary files a/fixture/4/0/7 and /dev/null differ diff --git a/fixture/4/0/8 b/fixture/4/0/8 deleted file mode 100644 index 12b9e4c69a..0000000000 Binary files a/fixture/4/0/8 and /dev/null differ diff --git a/fixture/4/0/9 b/fixture/4/0/9 deleted file mode 100644 index 7d5922cd9d..0000000000 --- a/fixture/4/0/9 +++ /dev/null @@ -1 +0,0 @@ - p9i5- Vv_UGVXdS=}B%4TL}rSfU=C·^z<1=bl% '^Dexsa \ No newline at end of file diff --git a/fixture/4/1/.zarray b/fixture/4/1/.zarray deleted file mode 100644 index 6a051baada..0000000000 --- a/fixture/4/1/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "zlib", - "level": 1 - }, - "dtype": "|u1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 2222 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/4/1/.zattrs b/fixture/4/1/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/4/1/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/4/1/0 b/fixture/4/1/0 deleted file mode 100644 index e9f97dbccf..0000000000 Binary files a/fixture/4/1/0 and /dev/null differ diff --git a/fixture/4/1/1 b/fixture/4/1/1 deleted file mode 100644 index f0f6c39dd9..0000000000 Binary files a/fixture/4/1/1 and /dev/null differ diff --git a/fixture/4/1/10 b/fixture/4/1/10 deleted file mode 100644 index 1f36297a0a..0000000000 Binary files a/fixture/4/1/10 and /dev/null differ diff --git a/fixture/4/1/11 b/fixture/4/1/11 deleted file mode 100644 index c86df746da..0000000000 Binary files a/fixture/4/1/11 and /dev/null differ diff --git a/fixture/4/1/12 b/fixture/4/1/12 deleted file mode 100644 index 6fa13f610d..0000000000 Binary files a/fixture/4/1/12 and /dev/null differ diff --git a/fixture/4/1/13 b/fixture/4/1/13 deleted file mode 100644 index 248ae49efb..0000000000 Binary files a/fixture/4/1/13 and /dev/null differ diff --git a/fixture/4/1/14 b/fixture/4/1/14 deleted file mode 100644 index e81a6f9f17..0000000000 Binary files a/fixture/4/1/14 and /dev/null differ diff --git a/fixture/4/1/15 b/fixture/4/1/15 deleted file mode 100644 index d376431198..0000000000 Binary files a/fixture/4/1/15 and /dev/null differ diff --git a/fixture/4/1/16 b/fixture/4/1/16 deleted file mode 100644 index a677008367..0000000000 Binary files a/fixture/4/1/16 and /dev/null differ diff --git a/fixture/4/1/17 b/fixture/4/1/17 deleted file mode 100644 index ce8b23a238..0000000000 Binary files a/fixture/4/1/17 and /dev/null differ diff --git a/fixture/4/1/18 b/fixture/4/1/18 deleted file mode 100644 index a7f3d248d9..0000000000 Binary files a/fixture/4/1/18 and /dev/null differ diff --git a/fixture/4/1/19 b/fixture/4/1/19 deleted file mode 100644 index 7312311c52..0000000000 Binary files a/fixture/4/1/19 and /dev/null differ diff --git a/fixture/4/1/2 b/fixture/4/1/2 deleted file mode 100644 index b7920e4704..0000000000 Binary files a/fixture/4/1/2 and /dev/null differ diff --git a/fixture/4/1/20 b/fixture/4/1/20 deleted file mode 100644 index e4f8c115e3..0000000000 Binary files a/fixture/4/1/20 and /dev/null differ diff --git a/fixture/4/1/21 b/fixture/4/1/21 deleted file mode 100644 index 35225d4218..0000000000 Binary files a/fixture/4/1/21 and /dev/null differ diff --git a/fixture/4/1/22 b/fixture/4/1/22 deleted file mode 100644 index b8b5e078b7..0000000000 Binary files a/fixture/4/1/22 and /dev/null differ diff --git a/fixture/4/1/3 b/fixture/4/1/3 deleted file mode 100644 index 666dedbbf4..0000000000 Binary files a/fixture/4/1/3 and /dev/null differ diff --git a/fixture/4/1/4 b/fixture/4/1/4 deleted file mode 100644 index cfee6f71cc..0000000000 Binary files a/fixture/4/1/4 and /dev/null differ diff --git a/fixture/4/1/5 b/fixture/4/1/5 deleted file mode 100644 index d949a1f1d8..0000000000 Binary files a/fixture/4/1/5 and /dev/null differ diff --git a/fixture/4/1/6 b/fixture/4/1/6 deleted file mode 100644 index ff98a66fe2..0000000000 Binary files a/fixture/4/1/6 and /dev/null differ diff --git a/fixture/4/1/7 b/fixture/4/1/7 deleted file mode 100644 index 1890e57b24..0000000000 Binary files a/fixture/4/1/7 and /dev/null differ diff --git a/fixture/4/1/8 b/fixture/4/1/8 deleted file mode 100644 index 65040779fd..0000000000 Binary files a/fixture/4/1/8 and /dev/null differ diff --git a/fixture/4/1/9 b/fixture/4/1/9 deleted file mode 100644 index 4afeef2025..0000000000 Binary files a/fixture/4/1/9 and /dev/null differ diff --git a/fixture/4/2/.zarray b/fixture/4/2/.zarray deleted file mode 100644 index eb1c0d802a..0000000000 --- a/fixture/4/2/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "|u1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 2222 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/4/2/.zattrs b/fixture/4/2/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/4/2/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/4/2/0 b/fixture/4/2/0 deleted file mode 100644 index 6267e2504e..0000000000 Binary files a/fixture/4/2/0 and /dev/null differ diff --git a/fixture/4/2/1 b/fixture/4/2/1 deleted file mode 100644 index f8ae66dafa..0000000000 Binary files a/fixture/4/2/1 and /dev/null differ diff --git a/fixture/4/2/10 b/fixture/4/2/10 deleted file mode 100644 index f812d0bf37..0000000000 Binary files a/fixture/4/2/10 and /dev/null differ diff --git a/fixture/4/2/11 b/fixture/4/2/11 deleted file mode 100644 index 340e17990f..0000000000 Binary files a/fixture/4/2/11 and /dev/null differ diff --git a/fixture/4/2/12 b/fixture/4/2/12 deleted file mode 100644 index 28e7a19856..0000000000 Binary files a/fixture/4/2/12 and /dev/null differ diff --git a/fixture/4/2/13 b/fixture/4/2/13 deleted file mode 100644 index 410e633554..0000000000 Binary files a/fixture/4/2/13 and /dev/null differ diff --git a/fixture/4/2/14 b/fixture/4/2/14 deleted file mode 100644 index e50e9078b5..0000000000 Binary files a/fixture/4/2/14 and /dev/null differ diff --git a/fixture/4/2/15 b/fixture/4/2/15 deleted file mode 100644 index 522c940b87..0000000000 Binary files a/fixture/4/2/15 and /dev/null differ diff --git a/fixture/4/2/16 b/fixture/4/2/16 deleted file mode 100644 index cd18162164..0000000000 Binary files a/fixture/4/2/16 and /dev/null differ diff --git a/fixture/4/2/17 b/fixture/4/2/17 deleted file mode 100644 index c529a3217d..0000000000 Binary files a/fixture/4/2/17 and /dev/null differ diff --git a/fixture/4/2/18 b/fixture/4/2/18 deleted file mode 100644 index aecf6d89ed..0000000000 Binary files a/fixture/4/2/18 and /dev/null differ diff --git a/fixture/4/2/19 b/fixture/4/2/19 deleted file mode 100644 index 918edeb112..0000000000 Binary files a/fixture/4/2/19 and /dev/null differ diff --git a/fixture/4/2/2 b/fixture/4/2/2 deleted file mode 100644 index 5d1954653b..0000000000 Binary files a/fixture/4/2/2 and /dev/null differ diff --git a/fixture/4/2/20 b/fixture/4/2/20 deleted file mode 100644 index bc3cc5965e..0000000000 Binary files a/fixture/4/2/20 and /dev/null differ diff --git a/fixture/4/2/21 b/fixture/4/2/21 deleted file mode 100644 index 72e6bf012c..0000000000 Binary files a/fixture/4/2/21 and /dev/null differ diff --git a/fixture/4/2/22 b/fixture/4/2/22 deleted file mode 100644 index 317f843a59..0000000000 Binary files a/fixture/4/2/22 and /dev/null differ diff --git a/fixture/4/2/3 b/fixture/4/2/3 deleted file mode 100644 index 2900b3e95a..0000000000 Binary files a/fixture/4/2/3 and /dev/null differ diff --git a/fixture/4/2/4 b/fixture/4/2/4 deleted file mode 100644 index 1f799131df..0000000000 Binary files a/fixture/4/2/4 and /dev/null differ diff --git a/fixture/4/2/5 b/fixture/4/2/5 deleted file mode 100644 index 8810cf3345..0000000000 Binary files a/fixture/4/2/5 and /dev/null differ diff --git a/fixture/4/2/6 b/fixture/4/2/6 deleted file mode 100644 index f69c6e4cb6..0000000000 Binary files a/fixture/4/2/6 and /dev/null differ diff --git a/fixture/4/2/7 b/fixture/4/2/7 deleted file mode 100644 index 01083e08c7..0000000000 Binary files a/fixture/4/2/7 and /dev/null differ diff --git a/fixture/4/2/8 b/fixture/4/2/8 deleted file mode 100644 index 5ef12531ba..0000000000 Binary files a/fixture/4/2/8 and /dev/null differ diff --git a/fixture/4/2/9 b/fixture/4/2/9 deleted file mode 100644 index 227a54a717..0000000000 Binary files a/fixture/4/2/9 and /dev/null differ diff --git a/fixture/4/3/.zarray b/fixture/4/3/.zarray deleted file mode 100644 index cd1f621720..0000000000 --- a/fixture/4/3/.zarray +++ /dev/null @@ -1,19 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "clevel": 1, - "cname": "zstd", - "id": "blosc", - "shuffle": 0 - }, - "dtype": "|u1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 2222 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/4/3/.zattrs b/fixture/4/3/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/4/3/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/4/3/0 b/fixture/4/3/0 deleted file mode 100644 index f540c771ad..0000000000 Binary files a/fixture/4/3/0 and /dev/null differ diff --git a/fixture/4/3/1 b/fixture/4/3/1 deleted file mode 100644 index 79f1509b01..0000000000 Binary files a/fixture/4/3/1 and /dev/null differ diff --git a/fixture/4/3/10 b/fixture/4/3/10 deleted file mode 100644 index 367520fe2a..0000000000 Binary files a/fixture/4/3/10 and /dev/null differ diff --git a/fixture/4/3/11 b/fixture/4/3/11 deleted file mode 100644 index 512026eee9..0000000000 Binary files a/fixture/4/3/11 and /dev/null differ diff --git a/fixture/4/3/12 b/fixture/4/3/12 deleted file mode 100644 index 87ad9c1094..0000000000 Binary files a/fixture/4/3/12 and /dev/null differ diff --git a/fixture/4/3/13 b/fixture/4/3/13 deleted file mode 100644 index de77dc821c..0000000000 Binary files a/fixture/4/3/13 and /dev/null differ diff --git a/fixture/4/3/14 b/fixture/4/3/14 deleted file mode 100644 index 9d53c1a1c8..0000000000 Binary files a/fixture/4/3/14 and /dev/null differ diff --git a/fixture/4/3/15 b/fixture/4/3/15 deleted file mode 100644 index 1ab24bde60..0000000000 Binary files a/fixture/4/3/15 and /dev/null differ diff --git a/fixture/4/3/16 b/fixture/4/3/16 deleted file mode 100644 index a58272c3f6..0000000000 Binary files a/fixture/4/3/16 and /dev/null differ diff --git a/fixture/4/3/17 b/fixture/4/3/17 deleted file mode 100644 index 7826a65021..0000000000 Binary files a/fixture/4/3/17 and /dev/null differ diff --git a/fixture/4/3/18 b/fixture/4/3/18 deleted file mode 100644 index e1b745de02..0000000000 Binary files a/fixture/4/3/18 and /dev/null differ diff --git a/fixture/4/3/19 b/fixture/4/3/19 deleted file mode 100644 index b04e74d383..0000000000 Binary files a/fixture/4/3/19 and /dev/null differ diff --git a/fixture/4/3/2 b/fixture/4/3/2 deleted file mode 100644 index 9e2e6718e0..0000000000 Binary files a/fixture/4/3/2 and /dev/null differ diff --git a/fixture/4/3/20 b/fixture/4/3/20 deleted file mode 100644 index 426218fcea..0000000000 Binary files a/fixture/4/3/20 and /dev/null differ diff --git a/fixture/4/3/21 b/fixture/4/3/21 deleted file mode 100644 index 817a22bedc..0000000000 Binary files a/fixture/4/3/21 and /dev/null differ diff --git a/fixture/4/3/22 b/fixture/4/3/22 deleted file mode 100644 index 9b28ee38a0..0000000000 Binary files a/fixture/4/3/22 and /dev/null differ diff --git a/fixture/4/3/3 b/fixture/4/3/3 deleted file mode 100644 index e53f4402ba..0000000000 Binary files a/fixture/4/3/3 and /dev/null differ diff --git a/fixture/4/3/4 b/fixture/4/3/4 deleted file mode 100644 index 29149ad662..0000000000 Binary files a/fixture/4/3/4 and /dev/null differ diff --git a/fixture/4/3/5 b/fixture/4/3/5 deleted file mode 100644 index 3c7239e8f9..0000000000 Binary files a/fixture/4/3/5 and /dev/null differ diff --git a/fixture/4/3/6 b/fixture/4/3/6 deleted file mode 100644 index b02e381c2b..0000000000 Binary files a/fixture/4/3/6 and /dev/null differ diff --git a/fixture/4/3/7 b/fixture/4/3/7 deleted file mode 100644 index a8fc37a9df..0000000000 Binary files a/fixture/4/3/7 and /dev/null differ diff --git a/fixture/4/3/8 b/fixture/4/3/8 deleted file mode 100644 index a13db20a57..0000000000 Binary files a/fixture/4/3/8 and /dev/null differ diff --git a/fixture/4/3/9 b/fixture/4/3/9 deleted file mode 100644 index 4f2162de29..0000000000 Binary files a/fixture/4/3/9 and /dev/null differ diff --git a/fixture/4/4/.zarray b/fixture/4/4/.zarray deleted file mode 100644 index d89a6b2a47..0000000000 --- a/fixture/4/4/.zarray +++ /dev/null @@ -1,19 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "clevel": 1, - "cname": "zstd", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "|u1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 2222 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/4/4/.zattrs b/fixture/4/4/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/4/4/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/4/4/0 b/fixture/4/4/0 deleted file mode 100644 index 0cac20d7fc..0000000000 Binary files a/fixture/4/4/0 and /dev/null differ diff --git a/fixture/4/4/1 b/fixture/4/4/1 deleted file mode 100644 index a5f4ed368e..0000000000 Binary files a/fixture/4/4/1 and /dev/null differ diff --git a/fixture/4/4/10 b/fixture/4/4/10 deleted file mode 100644 index 1290670917..0000000000 Binary files a/fixture/4/4/10 and /dev/null differ diff --git a/fixture/4/4/11 b/fixture/4/4/11 deleted file mode 100644 index e2fea879dd..0000000000 Binary files a/fixture/4/4/11 and /dev/null differ diff --git a/fixture/4/4/12 b/fixture/4/4/12 deleted file mode 100644 index ab92dea000..0000000000 Binary files a/fixture/4/4/12 and /dev/null differ diff --git a/fixture/4/4/13 b/fixture/4/4/13 deleted file mode 100644 index a4ceee43eb..0000000000 Binary files a/fixture/4/4/13 and /dev/null differ diff --git a/fixture/4/4/14 b/fixture/4/4/14 deleted file mode 100644 index c75c32350d..0000000000 Binary files a/fixture/4/4/14 and /dev/null differ diff --git a/fixture/4/4/15 b/fixture/4/4/15 deleted file mode 100644 index 00f5a189c2..0000000000 Binary files a/fixture/4/4/15 and /dev/null differ diff --git a/fixture/4/4/16 b/fixture/4/4/16 deleted file mode 100644 index d9e34816ff..0000000000 Binary files a/fixture/4/4/16 and /dev/null differ diff --git a/fixture/4/4/17 b/fixture/4/4/17 deleted file mode 100644 index 8ed0d466f8..0000000000 Binary files a/fixture/4/4/17 and /dev/null differ diff --git a/fixture/4/4/18 b/fixture/4/4/18 deleted file mode 100644 index 94c28d9cb7..0000000000 Binary files a/fixture/4/4/18 and /dev/null differ diff --git a/fixture/4/4/19 b/fixture/4/4/19 deleted file mode 100644 index 3a772ffd25..0000000000 Binary files a/fixture/4/4/19 and /dev/null differ diff --git a/fixture/4/4/2 b/fixture/4/4/2 deleted file mode 100644 index f6280fe613..0000000000 Binary files a/fixture/4/4/2 and /dev/null differ diff --git a/fixture/4/4/20 b/fixture/4/4/20 deleted file mode 100644 index 5266e2804f..0000000000 Binary files a/fixture/4/4/20 and /dev/null differ diff --git a/fixture/4/4/21 b/fixture/4/4/21 deleted file mode 100644 index 1197e78ef2..0000000000 Binary files a/fixture/4/4/21 and /dev/null differ diff --git a/fixture/4/4/22 b/fixture/4/4/22 deleted file mode 100644 index 54cb6fb413..0000000000 Binary files a/fixture/4/4/22 and /dev/null differ diff --git a/fixture/4/4/3 b/fixture/4/4/3 deleted file mode 100644 index 67875d77bf..0000000000 Binary files a/fixture/4/4/3 and /dev/null differ diff --git a/fixture/4/4/4 b/fixture/4/4/4 deleted file mode 100644 index 79161bdd5a..0000000000 Binary files a/fixture/4/4/4 and /dev/null differ diff --git a/fixture/4/4/5 b/fixture/4/4/5 deleted file mode 100644 index ac780b8917..0000000000 Binary files a/fixture/4/4/5 and /dev/null differ diff --git a/fixture/4/4/6 b/fixture/4/4/6 deleted file mode 100644 index e1c7827214..0000000000 Binary files a/fixture/4/4/6 and /dev/null differ diff --git a/fixture/4/4/7 b/fixture/4/4/7 deleted file mode 100644 index eea6cedd1d..0000000000 Binary files a/fixture/4/4/7 and /dev/null differ diff --git a/fixture/4/4/8 b/fixture/4/4/8 deleted file mode 100644 index 633dc42bcd..0000000000 Binary files a/fixture/4/4/8 and /dev/null differ diff --git a/fixture/4/4/9 b/fixture/4/4/9 deleted file mode 100644 index d788950673..0000000000 Binary files a/fixture/4/4/9 and /dev/null differ diff --git a/fixture/4/5/.zarray b/fixture/4/5/.zarray deleted file mode 100644 index 683afb9240..0000000000 --- a/fixture/4/5/.zarray +++ /dev/null @@ -1,19 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "clevel": 1, - "cname": "zstd", - "id": "blosc", - "shuffle": 2 - }, - "dtype": "|u1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 2222 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/4/5/.zattrs b/fixture/4/5/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/4/5/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/4/5/0 b/fixture/4/5/0 deleted file mode 100644 index 662a0586a0..0000000000 Binary files a/fixture/4/5/0 and /dev/null differ diff --git a/fixture/4/5/1 b/fixture/4/5/1 deleted file mode 100644 index b248541fb4..0000000000 Binary files a/fixture/4/5/1 and /dev/null differ diff --git a/fixture/4/5/10 b/fixture/4/5/10 deleted file mode 100644 index a8b0e6d582..0000000000 Binary files a/fixture/4/5/10 and /dev/null differ diff --git a/fixture/4/5/11 b/fixture/4/5/11 deleted file mode 100644 index 52abc642ab..0000000000 Binary files a/fixture/4/5/11 and /dev/null differ diff --git a/fixture/4/5/12 b/fixture/4/5/12 deleted file mode 100644 index ed36238f59..0000000000 Binary files a/fixture/4/5/12 and /dev/null differ diff --git a/fixture/4/5/13 b/fixture/4/5/13 deleted file mode 100644 index de4dad839c..0000000000 Binary files a/fixture/4/5/13 and /dev/null differ diff --git a/fixture/4/5/14 b/fixture/4/5/14 deleted file mode 100644 index e228940b9b..0000000000 Binary files a/fixture/4/5/14 and /dev/null differ diff --git a/fixture/4/5/15 b/fixture/4/5/15 deleted file mode 100644 index 4d6eaa3bc2..0000000000 Binary files a/fixture/4/5/15 and /dev/null differ diff --git a/fixture/4/5/16 b/fixture/4/5/16 deleted file mode 100644 index 6ccf089153..0000000000 Binary files a/fixture/4/5/16 and /dev/null differ diff --git a/fixture/4/5/17 b/fixture/4/5/17 deleted file mode 100644 index 4717299646..0000000000 Binary files a/fixture/4/5/17 and /dev/null differ diff --git a/fixture/4/5/18 b/fixture/4/5/18 deleted file mode 100644 index c06d67649d..0000000000 Binary files a/fixture/4/5/18 and /dev/null differ diff --git a/fixture/4/5/19 b/fixture/4/5/19 deleted file mode 100644 index 53cde70394..0000000000 Binary files a/fixture/4/5/19 and /dev/null differ diff --git a/fixture/4/5/2 b/fixture/4/5/2 deleted file mode 100644 index 6283cd6544..0000000000 Binary files a/fixture/4/5/2 and /dev/null differ diff --git a/fixture/4/5/20 b/fixture/4/5/20 deleted file mode 100644 index 9f0339c8a2..0000000000 Binary files a/fixture/4/5/20 and /dev/null differ diff --git a/fixture/4/5/21 b/fixture/4/5/21 deleted file mode 100644 index a5bd479d51..0000000000 Binary files a/fixture/4/5/21 and /dev/null differ diff --git a/fixture/4/5/22 b/fixture/4/5/22 deleted file mode 100644 index bf6d3f0788..0000000000 Binary files a/fixture/4/5/22 and /dev/null differ diff --git a/fixture/4/5/3 b/fixture/4/5/3 deleted file mode 100644 index 9e7a719eb0..0000000000 Binary files a/fixture/4/5/3 and /dev/null differ diff --git a/fixture/4/5/4 b/fixture/4/5/4 deleted file mode 100644 index 81dee86fe9..0000000000 Binary files a/fixture/4/5/4 and /dev/null differ diff --git a/fixture/4/5/5 b/fixture/4/5/5 deleted file mode 100644 index feb0e18f90..0000000000 Binary files a/fixture/4/5/5 and /dev/null differ diff --git a/fixture/4/5/6 b/fixture/4/5/6 deleted file mode 100644 index 08e2f1edcc..0000000000 Binary files a/fixture/4/5/6 and /dev/null differ diff --git a/fixture/4/5/7 b/fixture/4/5/7 deleted file mode 100644 index 2ac54b8369..0000000000 Binary files a/fixture/4/5/7 and /dev/null differ diff --git a/fixture/4/5/8 b/fixture/4/5/8 deleted file mode 100644 index af53e544f8..0000000000 Binary files a/fixture/4/5/8 and /dev/null differ diff --git a/fixture/4/5/9 b/fixture/4/5/9 deleted file mode 100644 index 6ef6b159b8..0000000000 Binary files a/fixture/4/5/9 and /dev/null differ diff --git a/fixture/4/6/.zarray b/fixture/4/6/.zarray deleted file mode 100644 index 8489e95909..0000000000 --- a/fixture/4/6/.zarray +++ /dev/null @@ -1,19 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "clevel": 1, - "cname": "lz4", - "id": "blosc", - "shuffle": 0 - }, - "dtype": "|u1", - "fill_value": 0, - "filters": null, - "order": "F", - "shape": [ - 2222 - ], - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/4/6/.zattrs b/fixture/4/6/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/4/6/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/4/6/0 b/fixture/4/6/0 deleted file mode 100644 index 3a1d0df7a2..0000000000 Binary files a/fixture/4/6/0 and /dev/null differ diff --git a/fixture/4/6/1 b/fixture/4/6/1 deleted file mode 100644 index 790c8f8b15..0000000000 Binary files a/fixture/4/6/1 and /dev/null differ diff --git a/fixture/4/6/10 b/fixture/4/6/10 deleted file mode 100644 index 839dd2d2d0..0000000000 Binary files a/fixture/4/6/10 and /dev/null differ diff --git a/fixture/4/6/11 b/fixture/4/6/11 deleted file mode 100644 index 30b1abffc6..0000000000 Binary files a/fixture/4/6/11 and /dev/null differ diff --git a/fixture/4/6/12 b/fixture/4/6/12 deleted file mode 100644 index 70a9c95a8b..0000000000 Binary files a/fixture/4/6/12 and /dev/null differ diff --git a/fixture/4/6/13 b/fixture/4/6/13 deleted file mode 100644 index 1e0c2a7a9a..0000000000 Binary files a/fixture/4/6/13 and /dev/null differ diff --git a/fixture/4/6/14 b/fixture/4/6/14 deleted file mode 100644 index 00deb5e1cc..0000000000 Binary files a/fixture/4/6/14 and /dev/null differ diff --git a/fixture/4/6/15 b/fixture/4/6/15 deleted file mode 100644 index fad2921266..0000000000 Binary files a/fixture/4/6/15 and /dev/null differ diff --git a/fixture/4/6/16 b/fixture/4/6/16 deleted file mode 100644 index 7997cda633..0000000000 Binary files a/fixture/4/6/16 and /dev/null differ diff --git a/fixture/4/6/17 b/fixture/4/6/17 deleted file mode 100644 index b0fbdbeacb..0000000000 Binary files a/fixture/4/6/17 and /dev/null differ diff --git a/fixture/4/6/18 b/fixture/4/6/18 deleted file mode 100644 index f012ed4200..0000000000 Binary files a/fixture/4/6/18 and /dev/null differ diff --git a/fixture/4/6/19 b/fixture/4/6/19 deleted file mode 100644 index f9cb235bf3..0000000000 Binary files a/fixture/4/6/19 and /dev/null differ diff --git a/fixture/4/6/2 b/fixture/4/6/2 deleted file mode 100644 index 040c088e5c..0000000000 Binary files a/fixture/4/6/2 and /dev/null differ diff --git a/fixture/4/6/20 b/fixture/4/6/20 deleted file mode 100644 index af3e2f1bba..0000000000 Binary files a/fixture/4/6/20 and /dev/null differ diff --git a/fixture/4/6/21 b/fixture/4/6/21 deleted file mode 100644 index f7a1749aec..0000000000 Binary files a/fixture/4/6/21 and /dev/null differ diff --git a/fixture/4/6/22 b/fixture/4/6/22 deleted file mode 100644 index 265130bbd9..0000000000 Binary files a/fixture/4/6/22 and /dev/null differ diff --git a/fixture/4/6/3 b/fixture/4/6/3 deleted file mode 100644 index 597b7c2787..0000000000 Binary files a/fixture/4/6/3 and /dev/null differ diff --git a/fixture/4/6/4 b/fixture/4/6/4 deleted file mode 100644 index 8500f6a21a..0000000000 Binary files a/fixture/4/6/4 and /dev/null differ diff --git a/fixture/4/6/5 b/fixture/4/6/5 deleted file mode 100644 index 3b747cce85..0000000000 Binary files a/fixture/4/6/5 and /dev/null differ diff --git a/fixture/4/6/6 b/fixture/4/6/6 deleted file mode 100644 index 51e8466415..0000000000 Binary files a/fixture/4/6/6 and /dev/null differ diff --git a/fixture/4/6/7 b/fixture/4/6/7 deleted file mode 100644 index 17752969b6..0000000000 Binary files a/fixture/4/6/7 and /dev/null differ diff --git a/fixture/4/6/8 b/fixture/4/6/8 deleted file mode 100644 index a8f7fd328a..0000000000 Binary files a/fixture/4/6/8 and /dev/null differ diff --git a/fixture/4/6/9 b/fixture/4/6/9 deleted file mode 100644 index 8bbde74ea2..0000000000 Binary files a/fixture/4/6/9 and /dev/null differ diff --git a/fixture/5/.zattrs b/fixture/5/.zattrs deleted file mode 100644 index 9e26dfeeb6..0000000000 --- a/fixture/5/.zattrs +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/fixture/5/.zgroup b/fixture/5/.zgroup deleted file mode 100644 index 3b7daf227c..0000000000 --- a/fixture/5/.zgroup +++ /dev/null @@ -1,3 +0,0 @@ -{ - "zarr_format": 2 -} \ No newline at end of file diff --git a/fixture/5/0/.zarray b/fixture/5/0/.zarray deleted file mode 100644 index 8b3e48ffe7..0000000000 --- a/fixture/5/0/.zarray +++ /dev/null @@ -1,14 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": null, - "dtype": "o?^"~{OňN_D>'#^ -[uoyO^ү[3@9KSx_6= %\q7z5e/iūTeY f,[tƺlZɇtxO',׻7\Yο_=<;+܅?s4 \ No newline at end of file diff --git a/fixture/6/1/15 b/fixture/6/1/15 deleted file mode 100644 index d816848355..0000000000 Binary files a/fixture/6/1/15 and /dev/null differ diff --git a/fixture/6/1/16 b/fixture/6/1/16 deleted file mode 100644 index 69f3f7e6b8..0000000000 --- a/fixture/6/1/16 +++ /dev/null @@ -1,2 +0,0 @@ -x?(q_`E,7n`ԕA1[OpA d+$a2] )w~><;Q-z"t艘ވG.5nnNݔ5=6ln_IgBߚlX+Oڋj33sc -$رK5 Qܦj:?gžQ~ߜ:ꂷ|FΟ\]yl} ;e7|E"ege-vqx6 \ No newline at end of file diff --git a/fixture/6/1/17 b/fixture/6/1/17 deleted file mode 100644 index 5bb69635fd..0000000000 --- a/fixture/6/1/17 +++ /dev/null @@ -1,2 +0,0 @@ -xο+q:I70 bX -#7 ]6n%)dJoW|ߟO%Qi'ѭl1~ޣywr“ֈ):m7X|sg{[4_~Z3&9:sNkfW=5[s]I9wffZJاuQ7s[;OSu+yQasw½%sx?n&2 \ No newline at end of file diff --git a/fixture/6/1/18 b/fixture/6/1/18 deleted file mode 100644 index 2fa507b536..0000000000 --- a/fixture/6/1/18 +++ /dev/null @@ -1 +0,0 @@ -xб+qq-In_ @LAreA6,),])$餔Y~çy#b/%ߙ\|7לd#6f>b-w?_-区/Ig-J4%BmOx׽]NkuRFnh#1we߫#QxJvo{kmbu\e=I]γCSu+w~xW2@ \ No newline at end of file diff --git a/fixture/6/1/19 b/fixture/6/1/19 deleted file mode 100644 index 82b6ae9cd4..0000000000 --- a/fixture/6/1/19 +++ /dev/null @@ -1 +0,0 @@ -x?(q3J &p.`"R ddpu]ARIJl7<=?˹ψXŞxD͕z_5UhD;^7O:|n0;C{,=Œ~/d.WoЩ';3wvkKٜ9ʘ/Sosj?Y%sU^ncgy;)+8nbi\w{5Fnkv}՝~Q2 \ No newline at end of file diff --git a/fixture/6/1/2 b/fixture/6/1/2 deleted file mode 100644 index 563994f7ce..0000000000 --- a/fixture/6/1/2 +++ /dev/null @@ -1,2 +0,0 @@ -xο+qG -nQO7Q&,vErw)l-NAȮ 2=N2`?y 94z":P#}:mDL F˶L~J7=jU/ޛPm/~e~=+uڲItau!aC^Oۘнﺽ"KAK?_2 z(ԕ-FftSv/<ɗ= _Wl<;v:n35V \ No newline at end of file diff --git a/fixture/6/1/20 b/fixture/6/1/20 deleted file mode 100644 index e876fabebc..0000000000 --- a/fixture/6/1/20 +++ /dev/null @@ -1,3 +0,0 @@ -x%1(qa2 \,w WLW3rLr B]b0LhP[%gxz}_boc>"1q2"e:f^[_D"`݁Y=rSe~6yS{maoKLp_'s%|]3y+x -eHk -lA3e]czgz`Z"7" xKfn$g߮NWm1c \ No newline at end of file diff --git a/fixture/6/1/21 b/fixture/6/1/21 deleted file mode 100644 index 56f0e9e2a4..0000000000 --- a/fixture/6/1/21 +++ /dev/null @@ -1,3 +0,0 @@ -x;,PT,f J $*L4Ju&VQ 1HMt$$b|w89?2q4qEKD"&)w -kO*)˹65wC~xϼk'S+93e~ d7:e^>qS׶ŋxMӫl]1n/߸GRɿt{ -oKmG!iq2 \ No newline at end of file diff --git a/fixture/6/1/22 b/fixture/6/1/22 deleted file mode 100644 index 1f8433ac7b..0000000000 Binary files a/fixture/6/1/22 and /dev/null differ diff --git a/fixture/6/1/3 b/fixture/6/1/3 deleted file mode 100644 index 008050d082..0000000000 --- a/fixture/6/1/3 +++ /dev/null @@ -1 +0,0 @@ -x!Haaj2"nA-&,ʂ6a "eM0MFA"ð}]6xDL3('k#=EGt`Krn?9sm7| .ʼ<@~<G}4o7:e/ޱU\[oe%s5;<;>"#Zxn胇QaS17oCy j^+nTF{Kx?_k[#9}cO>|!w)7t]vNoQ7/Y[#8У)N׼3/COj=~1ʟ / \ No newline at end of file diff --git a/fixture/6/1/5 b/fixture/6/1/5 deleted file mode 100644 index e690d646fa..0000000000 Binary files a/fixture/6/1/5 and /dev/null differ diff --git a/fixture/6/1/6 b/fixture/6/1/6 deleted file mode 100644 index 06d044458c..0000000000 --- a/fixture/6/1/6 +++ /dev/null @@ -1 +0,0 @@ -x/HCqS'aiAm W`6( f  0=WDLMDH[D*cX_iђ\v4bJ[^CF7n*GxoQ۝ݮDz|M_OV{P@W{Սgwsb?SH&ߴ޷͹g6R3UݞMwdX{6/'mΝOJ/y \ No newline at end of file diff --git a/fixture/6/1/7 b/fixture/6/1/7 deleted file mode 100644 index a2546fc6f9..0000000000 Binary files a/fixture/6/1/7 and /dev/null differ diff --git a/fixture/6/1/8 b/fixture/6/1/8 deleted file mode 100644 index 8438ac02d1..0000000000 Binary files a/fixture/6/1/8 and /dev/null differ diff --git a/fixture/6/1/9 b/fixture/6/1/9 deleted file mode 100644 index 7a69fc9334..0000000000 --- a/fixture/6/1/9 +++ /dev/null @@ -1,2 +0,0 @@ -x?(q߷Kw Hd &`e w.)ztJI].V&2߾{~YOL_HW.FuS^_Go`MߤS!s1Fq+K~1@[&;ڷ-;^s {ҫAݜyZne3#^iīnWh?Z07 \ No newline at end of file diff --git a/fixture/6/2/.zarray b/fixture/6/2/.zarray deleted file mode 100644 index e251bae8a8..0000000000 --- a/fixture/6/2/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": " |+8+'c~wݡʿBOï`OJ0O-w`#m{~Ÿaƞu={=3c/7km{*|r܈6_N}\S䟴/y>gNa3z~ȼD͗޷1-{3 \ No newline at end of file diff --git a/fixture/7/1/10 b/fixture/7/1/10 deleted file mode 100644 index 3dec0e1b48..0000000000 Binary files a/fixture/7/1/10 and /dev/null differ diff --git a/fixture/7/1/11 b/fixture/7/1/11 deleted file mode 100644 index 4acad22bd2..0000000000 --- a/fixture/7/1/11 +++ /dev/null @@ -1 +0,0 @@ -x5ҿ+qs+]., SW,wbd`ێ?@IR̗RGLSﳼ|{β|0-t|:5_=Pxلg~~DϘn8Z z7`oCys_[yV^)>>öx]{OG/'sv*k/Wi"\kF^>y:g__߃ l-韗W/ \ No newline at end of file diff --git a/fixture/7/1/12 b/fixture/7/1/12 deleted file mode 100644 index 0ed495820d..0000000000 --- a/fixture/7/1/12 +++ /dev/null @@ -1,2 +0,0 @@ -x5/HCq?[0@ f""bj$K"cmED,a, -8,2u~|˹sM['CtKQ/di\=JR%a:we~Cn`4n̯䟗[5<Ӿ'ez˾9#c%^cS+ݓsB7JӉ1y3rGF gw#8nWl/U*S|x,ΚW/'?V^z>`GB5U \ No newline at end of file diff --git a/fixture/7/1/13 b/fixture/7/1/13 deleted file mode 100644 index 643f9ebe60..0000000000 --- a/fixture/7/1/13 +++ /dev/null @@ -1 +0,0 @@ -x5+q/۬@YNn&%(r!FN.jw?.NVJ,a9Hy\=_yG/0}> | s?!A~VEzaVMύy]O?|#E؍ݚ |>kkӖ3ɝ~/pj6}^2?72 \ No newline at end of file diff --git a/fixture/7/1/14 b/fixture/7/1/14 deleted file mode 100644 index 8910adf351..0000000000 Binary files a/fixture/7/1/14 and /dev/null differ diff --git a/fixture/7/1/15 b/fixture/7/1/15 deleted file mode 100644 index bb4bfaba31..0000000000 --- a/fixture/7/1/15 +++ /dev/null @@ -1 +0,0 @@ -x5ѯKCqoVDʬ D . g ȶ `eA ’M'(AWλsp\ #a>] "\X_6n` rI8}`צYW 95#ضr~{wbOޙy6%}V3ȯ+L{?ji}va.KN]S=UU}N{=_l~q|01 \ No newline at end of file diff --git a/fixture/7/1/16 b/fixture/7/1/16 deleted file mode 100644 index c093f63a14..0000000000 --- a/fixture/7/1/16 +++ /dev/null @@ -1,2 +0,0 @@ -x5;HapR7&4N$MQCRSP46E8Hh=ګ͆qOx p0_-&bYM 6\3@_hg -|?KzeXгʷeywO?'ʷ%Ws慨*;j>\'^˗I?n9ܻ}c0r/Sz23x7~/II~O3V \ No newline at end of file diff --git a/fixture/7/1/17 b/fixture/7/1/17 deleted file mode 100644 index ea4559a237..0000000000 --- a/fixture/7/1/17 +++ /dev/null @@ -1 +0,0 @@ -x5п+q\2[(-. R}78"+ Yd$L۟ bcA ߯OݦŸL`Os});Qe|Nһ1վyX\Q澊~o//{_:VW|r6?%?Ȑɛsׁ<3cmJn}EU+M9H,_]||U'ݩw$u">!}{ϵ 1 \ No newline at end of file diff --git a/fixture/7/1/18 b/fixture/7/1/18 deleted file mode 100644 index e55581b69a..0000000000 --- a/fixture/7/1/18 +++ /dev/null @@ -1,3 +0,0 @@ -x]?(q;wDJ2lPԱ(ut͟$c`]YlMdgydfP.ny>^-vLoaLr~|I -~]~~ʜq=݅{7noy3Gmٯg':pDOo/=|G;i 4$݆ -~ Et+r>'=*ݐw[-s=s]ώW=tsq!_q_ދ;0_0? \ No newline at end of file diff --git a/fixture/7/1/19 b/fixture/7/1/19 deleted file mode 100644 index 1a5759b0d8..0000000000 --- a/fixture/7/1/19 +++ /dev/null @@ -1 +0,0 @@ -x5!HCa=6^hrA Nb(YVua%diL6A%U2?sϽ+F/58\L^=fBm0M%o2 '6#{8K,6w+wMON߸]'ޱ!g^E㸼:>{(lMi/f |M|icWsuk]g]1jeψOqFH}y= b. \ No newline at end of file diff --git a/fixture/7/1/2 b/fixture/7/1/2 deleted file mode 100644 index 003395bec4..0000000000 --- a/fixture/7/1/2 +++ /dev/null @@ -1 +0,0 @@ -x5б+qs$%E&ŀlNF%ɩRG"nq)QPn+QR_ E[>|ߟ繮 :ɯe$U3w lx(y3oC.8f+SXC߃3[ \ No newline at end of file diff --git a/fixture/7/1/21 b/fixture/7/1/21 deleted file mode 100644 index 212e3514d2..0000000000 --- a/fixture/7/1/21 +++ /dev/null @@ -1 +0,0 @@ -x5ЯKCaPa it0aa,65- ‚m^APll-_ssx"mӁ }&+d~UEXhϚX}S ߗ;5 7MwW_҇rԯuz(K|UWiaDϨ8$? |5 6yy|O[m✹sOO}۞)s^Ww}=6ms|Y/& \ No newline at end of file diff --git a/fixture/7/1/22 b/fixture/7/1/22 deleted file mode 100644 index e05e70333d..0000000000 Binary files a/fixture/7/1/22 and /dev/null differ diff --git a/fixture/7/1/3 b/fixture/7/1/3 deleted file mode 100644 index e098767a58..0000000000 --- a/fixture/7/1/3 +++ /dev/null @@ -1 +0,0 @@ -x5?(q;E1EE)ȟLtn+FEnbN)aL7\XXlӥ|_[yy(\tDm5F2Lcs&! _?Կk};<,%ݱBW wʜ^=gtӃ|]|9oqy]{ cew\-<wדo9ȗt Xtߩ5j~i?K{}O]Cd_}sMw"ܔ0* \ No newline at end of file diff --git a/fixture/7/1/4 b/fixture/7/1/4 deleted file mode 100644 index 04ea1fa394..0000000000 --- a/fixture/7/1/4 +++ /dev/null @@ -1,2 +0,0 @@ -x5?KQ?oI-M"Q AQKmVb4SC*AAkC:aM } tyssν"\v4  -w5~v"9ّo_cy|Oo9^溽g5XߛU{Jz'|Is ~s 5=&7#o^ᬽs"=wG[|wK |3r3r&2ߑvo _'}}$!g?{~U- \ No newline at end of file diff --git a/fixture/7/1/5 b/fixture/7/1/5 deleted file mode 100644 index fac8730e26..0000000000 --- a/fixture/7/1/5 +++ /dev/null @@ -1,6 +0,0 @@ -x]-Has:dU,d&6-0&b4(hYšC8,6 - -C@D1dL0i|~ɷ{'D`5/C{ -??7|I -},AO5[/kwO^O}ѯϩ;|ܧ{ -MmE/Ex-w@wuY)l`}[re]sz}i{߭{V1c/kaz39Kcr2|r~S;Gꊹ~s]_))2 \ No newline at end of file diff --git a/fixture/7/1/6 b/fixture/7/1/6 deleted file mode 100644 index e1d4851359..0000000000 --- a/fixture/7/1/6 +++ /dev/null @@ -1,2 +0,0 @@ -x51/CqG` 1b!D:H Md$A(Q4ED"i :,&@={vDO3e[k況F]eC.aO-%aüaOł0Oe=2)=;~J^$W^^C7/*6[pc/ݛ#ͷ+wOo}kc[uNO{!  -1V \ No newline at end of file diff --git a/fixture/7/1/7 b/fixture/7/1/7 deleted file mode 100644 index 265611e887..0000000000 --- a/fixture/7/1/7 +++ /dev/null @@ -1 +0,0 @@ -x5б+qqeBd`EJpF:70tGRn[,YQR"E s>>~8e8 | rq~s[ΖwզgZ}[gkїWO ʯ;3'n=8dD)LS1] Jw k n^?WW9Ox]쀋|rg[ lCg 'ū̙c &ڽc1 \ No newline at end of file diff --git a/fixture/7/1/8 b/fixture/7/1/8 deleted file mode 100644 index dc459ca766..0000000000 --- a/fixture/7/1/8 +++ /dev/null @@ -1 +0,0 @@ -x51(qȠn0u2Ā1tI7\r%ҭtR rX.}}v|2'xkC|pn=vh/w?_Yz|k._%xD{`KnSn|nG.ɩU0no;rw??'wf?_QqOs)g=ߕ1{}.k;ْ3e9p#%6q \ No newline at end of file diff --git a/fixture/7/1/9 b/fixture/7/1/9 deleted file mode 100644 index 2d92fded36..0000000000 --- a/fixture/7/1/9 +++ /dev/null @@ -1,2 +0,0 @@ -x5+KaM@:0LeILd -F`d]W< jh a&o}&H-zf(S Nq aEܤ9LOL0zgO}m~|_ߐeYtaǞ:M?ͻs;Ưo;r>|m®ww'-ǁEwwOz^2W[#w'+iN-- \ No newline at end of file diff --git a/fixture/7/2/.zarray b/fixture/7/2/.zarray deleted file mode 100644 index d1f1669345..0000000000 --- a/fixture/7/2/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "6?6A6B6C6D6E6G6H6I6J6L6M6N6O6Q6R6S6T6U6W6X6Y6Z6\6]6^6_6`6b6c6d6e6g6h6i6j6l6m6n6o6p6r6s6t6u6w6x6y6z6|6}6~666666666666666666666666666666666666666666666666 \ No newline at end of file diff --git a/fixture/8/0/14 b/fixture/8/0/14 deleted file mode 100644 index e5354ceffc..0000000000 Binary files a/fixture/8/0/14 and /dev/null differ diff --git a/fixture/8/0/15 b/fixture/8/0/15 deleted file mode 100644 index 1f210be817..0000000000 --- a/fixture/8/0/15 +++ /dev/null @@ -1 +0,0 @@ -4757678797:7;7=7>7?7@7A7C7D7E7F7H7I7J7K7M7N7O7P7Q7S7T7U7V7X7Y7Z7[7]7^7_7`7a7c7d7e7f7h7i7j7k7l7n7o7p7q7s7t7u7v7x7y7z7{7|7~7777777777777777777777777777777777777777 \ No newline at end of file diff --git a/fixture/8/0/16 b/fixture/8/0/16 deleted file mode 100644 index 8234a8d60f..0000000000 Binary files a/fixture/8/0/16 and /dev/null differ diff --git a/fixture/8/0/17 b/fixture/8/0/17 deleted file mode 100644 index 1a679108a0..0000000000 --- a/fixture/8/0/17 +++ /dev/null @@ -1 +0,0 @@ -888888888888888888 8!8!8"8"8#8$8$8%8%8&8'8'8(8)8)8*8*8+8,8,8-8-8.8/8/808181828283848485858687878889898:8:8;8<8<8=8=8>8?8?8@8A8A8B8B8C8D8D8E8E8F8G8G8H8I8I8J8J8K8L8L8M8M8N8O8O8P8Q8Q8R8 \ No newline at end of file diff --git a/fixture/8/0/18 b/fixture/8/0/18 deleted file mode 100644 index 3a621a92df..0000000000 --- a/fixture/8/0/18 +++ /dev/null @@ -1 +0,0 @@ -R8S8T8T8U8U8V8W8W8X8Y8Y8Z8Z8[8\8\8]8]8^8_8_8`8`8a8b8b8c8d8d8e8e8f8g8g8h8h8i8j8j8k8l8l8m8m8n8o8o8p8p8q8r8r8s8t8t8u8u8v8w8w8x8x8y8z8z8{8|8|8}8}8~88888888888888888888888888888 \ No newline at end of file diff --git a/fixture/8/0/19 b/fixture/8/0/19 deleted file mode 100644 index bec158bd6f..0000000000 --- a/fixture/8/0/19 +++ /dev/null @@ -1 +0,0 @@ -8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 \ No newline at end of file diff --git a/fixture/8/0/2 b/fixture/8/0/2 deleted file mode 100644 index abbbc5a38b..0000000000 --- a/fixture/8/0/2 +++ /dev/null @@ -1 +0,0 @@ -+++++++++, ,,,,,!,&,+,0,5,:,?,D,I,M,R,W,\,a,f,k,p,u,z,,,,,,,,,,,,,,,,,,,,,,,,,,,,-- ----!-&-+-0-5-9->-C-H-M-R-W-\-a-f-k-p-t-y-~-------------- \ No newline at end of file diff --git a/fixture/8/0/20 b/fixture/8/0/20 deleted file mode 100644 index 40fefc09d4..0000000000 Binary files a/fixture/8/0/20 and /dev/null differ diff --git a/fixture/8/0/21 b/fixture/8/0/21 deleted file mode 100644 index 6b33e50e5f..0000000000 --- a/fixture/8/0/21 +++ /dev/null @@ -1 +0,0 @@ - 9 9 9 9 999999999999999999999999999999 9 9!9!9"9#9#9$9%9%9&9&9'9(9(9)9)9*9+9+9,9-9-9.9.9/90909191929393949595969697989899999:9;9;9<9=9=9>9>9?9@9@9A9A9B9C9C9D9E9E9F9F9G9H9 \ No newline at end of file diff --git a/fixture/8/0/22 b/fixture/8/0/22 deleted file mode 100644 index 292d67cb35..0000000000 --- a/fixture/8/0/22 +++ /dev/null @@ -1 +0,0 @@ -H9I9I9J9K9K9L9M9M9N9N9O9P9P9Q9Q9R9S9S9T9U9U9V9V9W9X9X9Y9Y9Z9[9[9\9]9]9^9^9_9`9`9a9a9b9c9c9d9d9e9f9f9g9h9h9i9i9j9k9k9l9l9m9n9n9o9p9p9q9q9r9s9s9t9t9u9v9v9w9x9x9y9y9z9{9{9|9|9}9~9~999999999999 \ No newline at end of file diff --git a/fixture/8/0/23 b/fixture/8/0/23 deleted file mode 100644 index 1da80b239c..0000000000 --- a/fixture/8/0/23 +++ /dev/null @@ -1 +0,0 @@ -9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 \ No newline at end of file diff --git a/fixture/8/0/24 b/fixture/8/0/24 deleted file mode 100644 index 475577ef79..0000000000 Binary files a/fixture/8/0/24 and /dev/null differ diff --git a/fixture/8/0/25 b/fixture/8/0/25 deleted file mode 100644 index 0c5c43a6ae..0000000000 --- a/fixture/8/0/25 +++ /dev/null @@ -1,3 +0,0 @@ -::::::::::::: : : -: -: : : : ::::::::::::::::::::::::::::::: :!:!:":":#:$:$:%:%:&:':':(:):):*:*:+:,:,:-:-:.:/:/:0:1:1:2:2:3:4:4:5:5:6:7:7:8:9:9:::::;:<:<:=:=: \ No newline at end of file diff --git a/fixture/8/0/26 b/fixture/8/0/26 deleted file mode 100644 index 2dea2cd590..0000000000 --- a/fixture/8/0/26 +++ /dev/null @@ -1 +0,0 @@ ->:?:?:@:A:A:B:B:C:D:D:E:E:F:G:G:H:I:I:J:J:K:L:L:M:M:N:O:O:P:Q:Q:R:R:S:T:T:U:U:V:W:W:X:Y:Y:Z:Z:[:\:\:]:]:^:_:_:`:`:a:b:b:c:d:d:e:e:f:g:g:h:h:i:j:j:k:l:l:m:m:n:o:o:p:p:q:r:r:s:t:t:u:u:v:w:w:x:x:y:z:z:{: \ No newline at end of file diff --git a/fixture/8/0/27 b/fixture/8/0/27 deleted file mode 100644 index a09b792d77..0000000000 --- a/fixture/8/0/27 +++ /dev/null @@ -1 +0,0 @@ -|:|:}:}:~:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: \ No newline at end of file diff --git a/fixture/8/0/28 b/fixture/8/0/28 deleted file mode 100644 index ae9c041253..0000000000 --- a/fixture/8/0/28 +++ /dev/null @@ -1 +0,0 @@ -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: \ No newline at end of file diff --git a/fixture/8/0/29 b/fixture/8/0/29 deleted file mode 100644 index 1fcfd3e335..0000000000 Binary files a/fixture/8/0/29 and /dev/null differ diff --git a/fixture/8/0/3 b/fixture/8/0/3 deleted file mode 100644 index 8189ad0a17..0000000000 --- a/fixture/8/0/3 +++ /dev/null @@ -1 +0,0 @@ --------------.. ....!.%.*./.4.9.>.C.H.M.R.W.\.`.e.j.o.t.y.~...........................// //// /%/*///4/9/>/C/H/M/Q/V/[/`/e/j/o/t/y/~////////// \ No newline at end of file diff --git a/fixture/8/0/30 b/fixture/8/0/30 deleted file mode 100644 index 3b4843c8ee..0000000000 --- a/fixture/8/0/30 +++ /dev/null @@ -1 +0,0 @@ -4;5;5;6;6;7;8;8;9;9;:;;;;;<;=;=;>;>;?;@;@;A;A;B;C;C;D;E;E;F;F;G;H;H;I;I;J;K;K;L;M;M;N;N;O;P;P;Q;Q;R;S;S;T;U;U;V;V;W;X;X;Y;Y;Z;[;[;\;];];^;^;_;`;`;a;a;b;c;c;d;d;e;f;f;g;h;h;i;i;j;k;k;l;l;m;n;n;o;p;p;q; \ No newline at end of file diff --git a/fixture/8/0/31 b/fixture/8/0/31 deleted file mode 100644 index 69ddd32101..0000000000 --- a/fixture/8/0/31 +++ /dev/null @@ -1 +0,0 @@ -q;r;s;s;t;t;u;v;v;w;x;x;y;y;z;{;{;|;|;};~;~;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; \ No newline at end of file diff --git a/fixture/8/0/32 b/fixture/8/0/32 deleted file mode 100644 index 36eb92395c..0000000000 --- a/fixture/8/0/32 +++ /dev/null @@ -1 +0,0 @@ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; \ No newline at end of file diff --git a/fixture/8/0/33 b/fixture/8/0/33 deleted file mode 100644 index db0396d24c..0000000000 Binary files a/fixture/8/0/33 and /dev/null differ diff --git a/fixture/8/0/4 b/fixture/8/0/4 deleted file mode 100644 index 849f1a60e9..0000000000 --- a/fixture/8/0/4 +++ /dev/null @@ -1 +0,0 @@ -/////////////////000 0 000000000!0$0&0)0+0-000205070:0<0?0A0D0F0I0K0M0P0R0U0W0Z0\0_0a0d0f0h0k0m0p0r0u0w0z0|000000000000000000000000000000000 \ No newline at end of file diff --git a/fixture/8/0/5 b/fixture/8/0/5 deleted file mode 100644 index 3e2f539f26..0000000000 --- a/fixture/8/0/5 +++ /dev/null @@ -1 +0,0 @@ -0000000000000000000001111 1 11111111!1#1&1(1+1-10121517191<1>1A1C1F1H1K1M1P1R1U1W1Y1\1^1a1c1f1h1k1m1p1r1t1w1y1|1~1111111111111111111111111111 \ No newline at end of file diff --git a/fixture/8/0/6 b/fixture/8/0/6 deleted file mode 100644 index 1e0bb36f03..0000000000 --- a/fixture/8/0/6 +++ /dev/null @@ -1,2 +0,0 @@ -11111111111111111111111112222 -2 22222222!2#2%2(2*2-2/222427292<2>2A2C2E2H2J2M2O2R2T2W2Y2\2^2`2c2e2h2j2m2o2r2t2w2y2|2~222222222222222222222222 \ No newline at end of file diff --git a/fixture/8/0/7 b/fixture/8/0/7 deleted file mode 100644 index e86f0d5f21..0000000000 Binary files a/fixture/8/0/7 and /dev/null differ diff --git a/fixture/8/0/8 b/fixture/8/0/8 deleted file mode 100644 index eaf53fe592..0000000000 Binary files a/fixture/8/0/8 and /dev/null differ diff --git a/fixture/8/0/9 b/fixture/8/0/9 deleted file mode 100644 index 323029cf6d..0000000000 --- a/fixture/8/0/9 +++ /dev/null @@ -1 +0,0 @@ -R4T4U4V4W4Y4Z4[4\4]4_4`4a4b4d4e4f4g4h4j4k4l4m4o4p4q4r4t4u4v4w4x4z4{4|4}44444444444444444444444444444444444444444444444444444444444444444 \ No newline at end of file diff --git a/fixture/8/1/.zarray b/fixture/8/1/.zarray deleted file mode 100644 index d53b2f7091..0000000000 --- a/fixture/8/1/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "zlib", - "level": 1 - }, - "dtype": "=?=@=d:B=uC=ʯD=}E=0%G=_H=I=HJ=L=JM=aN=O=P=y5R=,pS=ߪT=U=E W=ZX=Y=]Z= \=E]=v^=)_=`=0b=Akc=d=e=Zg= Vh=i=sj=%l=@m={n=>o=p=+r=Wfs= -t= \ No newline at end of file diff --git a/fixture/9/0/10 b/fixture/9/0/10 deleted file mode 100644 index 6b31d5ffd8..0000000000 --- a/fixture/9/0/10 +++ /dev/null @@ -1,3 +0,0 @@ -V>Й>>Y>F>n>\>> >_ >2> Z>b>>Л>e>>F>hm>>>k> ->2>nY>ŀ>>qϝ>>>tE>l>!>x>>$ ->{1>X>'>~>Ο>*>>D>-l>>ں>1> >0>4X>>ড>7Ρ>>>:D>k>撢>=>>>@0>W>~>C>ͣ>>F>C>j>I>>>L>/>V>O~>>̥>R>>B>Uj>>>Y>>/>\V>}>>_̧>> >bB>i>>e>ߨ> \ No newline at end of file diff --git a/fixture/9/0/11 b/fixture/9/0/11 deleted file mode 100644 index e3d7e05c61..0000000000 --- a/fixture/9/0/11 +++ /dev/null @@ -1 +0,0 @@ ->h.>U>}>k>˩>>n>A>i>q>Ƿ>ߪ>t>->!U>w|>Σ>$˫>z>>'A>}h>ԏ>*>ެ>>-->T>{>0>ʭ>>3>@>g>6>>ݮ>9>,>S>={>>ɯ>@>>?>Cg>>ﵰ>Fݰ>>+>IS>z>>Lɱ>>>O?>f>>R>ܲ>>U+>R>z>X>ȳ>>[>>>f>^>> ܴ>a>*>R>dy>>ȵ>h>>>>ke>>>n۶>>*>qQ>x>>tǷ>>!>w=> \ No newline at end of file diff --git a/fixture/9/0/12 b/fixture/9/0/12 deleted file mode 100644 index 27afc77e92..0000000000 Binary files a/fixture/9/0/12 and /dev/null differ diff --git a/fixture/9/0/13 b/fixture/9/0/13 deleted file mode 100644 index ecedf338ba..0000000000 --- a/fixture/9/0/13 +++ /dev/null @@ -1,2 +0,0 @@ ->>6>8>_>9>>><>$>K>?s>>>B>>7>E_>>>H>>#>KK>r>>N>>>Q7>^>>U>>>X#>J>r>[>>>^>6> -^>a>>>d>">J>gq>>>j>>6>m]>Ä>>p>>">sI>p> >v>>#>y5>\>&>|>>)>!>H>,p>>پ>/> >4>2\>>ߪ>5>> >9H>o>><>> >?4>[>>B>>> \ No newline at end of file diff --git a/fixture/9/0/14 b/fixture/9/0/14 deleted file mode 100644 index d6f8a1a8c9..0000000000 --- a/fixture/9/0/14 +++ /dev/null @@ -1,3 +0,0 @@ -E >G>n>H>>>K >3>Z>N>>>Q>>F>Tn>>>W> >3>ZZ>>>]>> ->`F>m> >d>> >g2>Y>>j>>>m>E>m>p>ƻ>>s ->1> Y>v>̧>#>y>>&E>|l>ӓ>)>> >,1>X>>/>>>2>D>k>5>>>8 >0>W>;>>>>>>C>Ak>>>E>>/>HW>~>>K>>>NC>j>>Q>>>T/>V> \ No newline at end of file diff --git a/fixture/9/0/15 b/fixture/9/0/15 deleted file mode 100644 index e79caa0e9b..0000000000 Binary files a/fixture/9/0/15 and /dev/null differ diff --git a/fixture/9/0/16 b/fixture/9/0/16 deleted file mode 100644 index 72ead90e6a..0000000000 Binary files a/fixture/9/0/16 and /dev/null differ diff --git a/fixture/9/0/17 b/fixture/9/0/17 deleted file mode 100644 index ee3be49fe5..0000000000 --- a/fixture/9/0/17 +++ /dev/null @@ -1,4 +0,0 @@ -?g???i???j&?:?M?la?u?ˆ?m???o???p?&?9?rM?a?t?t??ʯ?u? ??w?"?%?x9?#M?`?zt?%?Л?{?&??}?(??~%?)9?L?`?+t?և??-???.???0%?8?L?1`?s??3?ޮ??4???6?$?8?7L?_?s?9???:???<? ?$ ?=8 ?K ?_ ??s ? ? ?A ? ? ?B ? ? -?D$ -?7 -? \ No newline at end of file diff --git a/fixture/9/0/18 b/fixture/9/0/18 deleted file mode 100644 index 6703c4ff6a..0000000000 Binary files a/fixture/9/0/18 and /dev/null differ diff --git a/fixture/9/0/19 b/fixture/9/0/19 deleted file mode 100644 index c291600c42..0000000000 --- a/fixture/9/0/19 +++ /dev/null @@ -1 +0,0 @@ -x?#?!?y5?%I?\?{p?&?ї?|?(??~?)? ?!?+5?H?\?,p?׃??.?پ??/?? ?1!?4?H?2\?o??4?ߪ??5???7 ? ?4?9H?[?o?:???<???=? ? ??4?G?[?@o???B???C?? ?E ?3?G?F[?n??H???I???K ??3?MG?Z?n?N???P???Q? ??S3?F?Z?Tn??? \ No newline at end of file diff --git a/fixture/9/0/2 b/fixture/9/0/2 deleted file mode 100644 index 3e1667f0a2..0000000000 Binary files a/fixture/9/0/2 and /dev/null differ diff --git a/fixture/9/0/20 b/fixture/9/0/20 deleted file mode 100644 index 6de2757838..0000000000 --- a/fixture/9/0/20 +++ /dev/null @@ -1,5 +0,0 @@ -V???W?? ?Y?3?F?ZZ?n??\???]? ??_ ? -?2?`F? Z?m?b? ??d???e? ??g2?F?Y?hm???j???k?? -?m?2?E?nY?m?ŀ?p??ƻ?q???s -??1?tE? Y?l?v?!?̧?x?#??y?$ -??{1?&E?X?|l?'?ӓ?~?)???*? ? ?,1 ?D ?X ?-l ? ? ?/ ?ں ? ?1 ? ? !?2!?0!?D!? \ No newline at end of file diff --git a/fixture/9/0/21 b/fixture/9/0/21 deleted file mode 100644 index 0678831305..0000000000 --- a/fixture/9/0/21 +++ /dev/null @@ -1,2 +0,0 @@ -4X!?k!?!?5!?!?!?7!?!?!?8 "?"?0"?:D"?W"?k"?;"?"?"?="?"?"?>"?#?#?@0#?C#?W#?Ak#?~#?#?C#?#?#?E#?#?$?F$?/$?C$?HW$?j$?~$?I$?$?$?K$?$?$?L%?%?/%?NC%?V%?j%?O~%?%?%?Q%?%?%?R%?&?&?T/&?B&?V&?Uj&?~&?&?W&?&?&?Y&?&?'?Z'?/'?B'?\V'?j'?}'?]'?'?'?_'? -'?'?`(? (?.(?bB(? V(?i(?c}(?(?(?e(?(?(?f(? \ No newline at end of file diff --git a/fixture/9/0/22 b/fixture/9/0/22 deleted file mode 100644 index 655e07fc0a..0000000000 --- a/fixture/9/0/22 +++ /dev/null @@ -1 +0,0 @@ -)?)?h.)?B)?U)?ii)?})?)?k)?)?)?l)?)?*?n*?.*?A*?pU*?i*?|*?q*?*?Ƿ*?s*?*?*?t+?+?-+?vA+?!U+?h+?w|+?"+?Σ+?y+?$+?+?z+?%,?,?|-,?'A,?T,?}h,?)|,?ԏ,?,?*,?,?,?,,?-?-?---?@-?T-?/h-?{-?-?0-?۶-?-?2-?-?.?3.?,.?@.?5T.?g.?{.?6.?.?.?8.?.?.?9/?/?,/?;@/?S/?g/?={/?/?/?>/?/?/?@/?0?0?A,0??0?S0?Cg0?z0?0?D0? \ No newline at end of file diff --git a/fixture/9/0/23 b/fixture/9/0/23 deleted file mode 100644 index f6633eae3e..0000000000 Binary files a/fixture/9/0/23 and /dev/null differ diff --git a/fixture/9/0/24 b/fixture/9/0/24 deleted file mode 100644 index 9216c84f16..0000000000 Binary files a/fixture/9/0/24 and /dev/null differ diff --git a/fixture/9/0/25 b/fixture/9/0/25 deleted file mode 100644 index 4164afac15..0000000000 --- a/fixture/9/0/25 +++ /dev/null @@ -1 +0,0 @@ -@?V'@?;@?N@?Xb@?v@?@?Y@?@?@?[@?@?@?]A?'A?:A?^NA? bA?uA?`A? A?A?aA? A?A?cA?B?&B?d:B?NB?aB?fuB?B?B?gB?B?B?iB?B?C?j&C?:C?MC?laC?uC?ˆC?mC?C?C?oC?C?C?pD?&D?9D?rMD?aD?tD?tD?D?ʯD?uD? D?D?wD?"E?%E?x9E?#ME?`E?ztE?%E?ЛE?{E?&E?E?}E?(E?F?~%F?)9F?LF?`F?+tF?ևF?F?-F?F?F?.F?F?G?0%G?8G?LG?1`G?sG?G?3G?ޮG? \ No newline at end of file diff --git a/fixture/9/0/26 b/fixture/9/0/26 deleted file mode 100644 index 2bd41c73ca..0000000000 Binary files a/fixture/9/0/26 and /dev/null differ diff --git a/fixture/9/0/27 b/fixture/9/0/27 deleted file mode 100644 index 06eaf83458..0000000000 --- a/fixture/9/0/27 +++ /dev/null @@ -1 +0,0 @@ -gqO?O?O?hO?O?O?jO?O?P?l"P?6P?IP?m]P?qP?ÄP?oP?P?ſP?pP?P?P?rQ?"Q?5Q?sIQ?]Q?pQ?uQ? Q?˫Q?vQ?!Q?Q?xQ?#R?!R?y5R?%IR?\R?{pR?&R?їR?|R?(R?R?~R?)R? S?!S?+5S?HS?\S?,pS?׃S?S?.S?پS?S?/S?S? T?1!T?4T?HT?2\T?oT?T?4T?ߪT?T?5T?T?T?7 U? U?4U?9HU?[U?oU?:U?U?U?b?c?c?@0c?Cc?Wc?Akc?~c?c?Cc?c?c?Ec?c?d?Fd?/d?Cd?HWd?jd?~d?Id?d?d?Kd?d?d?Le?e?/e?NCe?Ve?je?O~e?e?e?Qe?e?e?Re?f?f?T/f?Bf?Vf?Ujf? \ No newline at end of file diff --git a/fixture/9/0/3 b/fixture/9/0/3 deleted file mode 100644 index b2cf6ffa43..0000000000 --- a/fixture/9/0/3 +++ /dev/null @@ -1,2 +0,0 @@ -d='==<=3ں=w==?=O==L='=Ŀ=Xb== =d:==u=p=ʯ=#M=}=և=0%==_=<==7=H=r==U=J==a="==m]== =y5==,p= =ߪ=9H===E ==Z=Q==3=]=m= =j=E==v==)=X==5=0==Ak===NC==~=Z== V=f==.=s=h=%==@=2={==>=S==J=+==Wf== -=c>= \ No newline at end of file diff --git a/fixture/9/0/30 b/fixture/9/0/30 deleted file mode 100644 index a30e3ee074..0000000000 --- a/fixture/9/0/30 +++ /dev/null @@ -1,2 +0,0 @@ -~f?f?Wf?f?f?Yf?f?g?Zg?/g?Bg?\Vg?jg?}g?]g?g?g?_g? -g?g?`h? h?.h?bBh? Vh?ih?c}h?h?h?eh?h?h?fh?i?i?h.i?Bi?Ui?iii?}i?i?ki?i?i?li?i?j?nj?.j?Aj?pUj?ij?|j?qj?j?Ƿj?sj?j?j?tk?k?-k?vAk?!Uk?hk?w|k?"k?Σk?yk?$k?k?zk?%l?l?|-l?'Al?Tl?}hl?)|l?ԏl?l?*l?l?l?,l?m?m?--m?@m?Tm?/hm?{m?m?0m?۶m?m?2m?m?n?3n? \ No newline at end of file diff --git a/fixture/9/0/31 b/fixture/9/0/31 deleted file mode 100644 index 120d96eef5..0000000000 Binary files a/fixture/9/0/31 and /dev/null differ diff --git a/fixture/9/0/32 b/fixture/9/0/32 deleted file mode 100644 index 115984f147..0000000000 Binary files a/fixture/9/0/32 and /dev/null differ diff --git a/fixture/9/0/33 b/fixture/9/0/33 deleted file mode 100644 index cdef8db668..0000000000 Binary files a/fixture/9/0/33 and /dev/null differ diff --git a/fixture/9/0/4 b/fixture/9/0/4 deleted file mode 100644 index acc26ee39f..0000000000 Binary files a/fixture/9/0/4 and /dev/null differ diff --git a/fixture/9/0/5 b/fixture/9/0/5 deleted file mode 100644 index 15ea08a157..0000000000 --- a/fixture/9/0/5 +++ /dev/null @@ -1,2 +0,0 @@ -V>>F>\> >2>b>>>hm>> ->nY>>>tE>!>>{1>'>> >-l >ں > !>4X!>!>!>:D">">">@0#>~#>#>F$>j$>$>L%>V%>%>R%>B&>&>Y&>/'>}'>_'> (>i(>e(>)>U)>k)>)>A*>q*>*>-+>w|+>$+>,>}h,>*,>->T->0->->@.>6.>.>,/>={/>/>0>Cg0>0>1>IS1>1>1>O?2>2>2>U+3>z3>3>[4>f4>4>a5>R5>5>h5>>6>6>n6>*7>x7>t7>!8> \ No newline at end of file diff --git a/fixture/9/0/6 b/fixture/9/0/6 deleted file mode 100644 index 745cd11269..0000000000 Binary files a/fixture/9/0/6 and /dev/null differ diff --git a/fixture/9/0/7 b/fixture/9/0/7 deleted file mode 100644 index ffa66cdf47..0000000000 --- a/fixture/9/0/7 +++ /dev/null @@ -1,3 +0,0 @@ -E W>nW>W>K X>ZX>X>QX>FY>Y>WY>3Z>Z>]Z> -[>m[>d[> \>Y\>j\>\>E]>p]>]>1^>v^>#^>_>|l_>)_> `>X`>/`>`>Da>5a>a>0b>;b>b>c>Akc>c>d>HWd>d>d>NCe>e>e>T/f>~f>f>Zg>jg>g>`h> Vh>h>fh>Bi>i>li>.j>|j>sj>k>hk>yk>%l>Tl>l>,l>@m>m>2m>,n>{n>8n>o>go>>o>p>Sp>Dp>p>?q>Jq>q>+r>Qzr>r>s>Wfs>s>t>]Rt> -t>t>c>u>u> \ No newline at end of file diff --git a/fixture/9/0/8 b/fixture/9/0/8 deleted file mode 100644 index 9f9eac9f83..0000000000 Binary files a/fixture/9/0/8 and /dev/null differ diff --git a/fixture/9/0/9 b/fixture/9/0/9 deleted file mode 100644 index 85846b4239..0000000000 Binary files a/fixture/9/0/9 and /dev/null differ diff --git a/fixture/9/1/.zarray b/fixture/9/1/.zarray deleted file mode 100644 index 015e37f3cb..0000000000 --- a/fixture/9/1/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "zlib", - "level": 1 - }, - "dtype": "G|&'}>Y3?XjF/M'pHk"lȭk!;##"U#E|_țC1Lܝsd-|@)?+ 5IK9627RSNp>a1#dgvDODm^IAdQm\iETMtC]t~AtSth==ݔ33z`tss5/捎.4Rftc^c}Vzlb?շu \ No newline at end of file diff --git a/fixture/9/1/17 b/fixture/9/1/17 deleted file mode 100644 index 6a17f1833a..0000000000 Binary files a/fixture/9/1/17 and /dev/null differ diff --git a/fixture/9/1/18 b/fixture/9/1/18 deleted file mode 100644 index 5726659bdb..0000000000 --- a/fixture/9/1/18 +++ /dev/null @@ -1,3 +0,0 @@ -xJa$1zG"Qj(P+EQD{#y $Fn(FQl+B):ں(p+|VryDtY1QU6_?;"_iͳ&5e͟f4/:=MW iõod، ÿe0U5hZ鎡ge&m9lGҟXve@ղ[̵,:%c?㘏*v7Sv8nxv:D7)NF6/-E]IWB)o gByBRIQDQ&;?&DJQۢ( - -m]IEUVdWvòW/5QC9VZ]q_9$9|<-zΖ=ߖ=js g9H* O. ˁKb+1s9XP \ No newline at end of file diff --git a/fixture/9/1/21 b/fixture/9/1/21 deleted file mode 100644 index d90f3b75ca..0000000000 Binary files a/fixture/9/1/21 and /dev/null differ diff --git a/fixture/9/1/22 b/fixture/9/1/22 deleted file mode 100644 index a91a34ec37..0000000000 --- a/fixture/9/1/22 +++ /dev/null @@ -1 +0,0 @@ -xaQ'hݛ99'"mnW+o$n{ 'DDwmڼ9Ü N aqb85x`x242 OӖ9K[,ʖ5|bյ[. -'͙bڱ'RxYu\n8%WǕc~x=v, iMVh - U4w-a#ؾ~ \ 2>լ|+k%%TOuZKsO>PTz&Y{<_*uks92|yST{&H`1Q lTꁟvx/k3pgODf"!r9Y]oFN#]& \ No newline at end of file diff --git a/fixture/9/1/23 b/fixture/9/1/23 deleted file mode 100644 index 050dbeba9e..0000000000 Binary files a/fixture/9/1/23 and /dev/null differ diff --git a/fixture/9/1/24 b/fixture/9/1/24 deleted file mode 100644 index b0010d7be5..0000000000 --- a/fixture/9/1/24 +++ /dev/null @@ -1 +0,0 @@ -x JQ߼;P6D )I6oII(M(|y nb-")|yRdUśmSqviE4s}ӼOh4Wy|IӿX,45W;zȰ1 bXJSA0T2V 5õmX Y#%/SҲhyXsòֶؐ%,k\x|F8 *l4m86Îјc[qͤ[ڱsˎ'UxqrD#)Yϭg'*x=UEGŸ;bw^` a3-ʬ \ No newline at end of file diff --git a/fixture/9/1/25 b/fixture/9/1/25 deleted file mode 100644 index 5f4e313e36..0000000000 Binary files a/fixture/9/1/25 and /dev/null differ diff --git a/fixture/9/1/26 b/fixture/9/1/26 deleted file mode 100644 index 42ba1629c5..0000000000 --- a/fixture/9/1/26 +++ /dev/null @@ -1 +0,0 @@ -xkaq"˒_p'M,><ϗE7IDDEDDv'm;ew""FDŽ~µQBq°U0\/bg.n,^14 7[cO]íajdYn,KZ-,Y>,w-˗^2ݷ|Z焓[,4T8UW5!i?ö`Ϟh(HNWNJ;ST_SԕM%YO9=Tyq&v"s8U/뎙cO9ˑ\޳/s>x^W=Mϛu\3x(p0xW %0_,TˁN`e7pq#))6!םy \ No newline at end of file diff --git a/fixture/9/1/27 b/fixture/9/1/27 deleted file mode 100644 index c808c1d370..0000000000 Binary files a/fixture/9/1/27 and /dev/null differ diff --git a/fixture/9/1/28 b/fixture/9/1/28 deleted file mode 100644 index a16b311194..0000000000 --- a/fixture/9/1/28 +++ /dev/null @@ -1 +0,0 @@ -xqHdEt6xEDD}rxau?@6ѝǣV6g-1 džSB^n"bZ,""on?úv \ No newline at end of file diff --git a/fixture/9/1/29 b/fixture/9/1/29 deleted file mode 100644 index 36f5c3ac36..0000000000 Binary files a/fixture/9/1/29 and /dev/null differ diff --git a/fixture/9/1/3 b/fixture/9/1/3 deleted file mode 100644 index 84bc05c39a..0000000000 Binary files a/fixture/9/1/3 and /dev/null differ diff --git a/fixture/9/1/30 b/fixture/9/1/30 deleted file mode 100644 index e41bc98ddb..0000000000 --- a/fixture/9/1/30 +++ /dev/null @@ -1,3 +0,0 @@ -xJaaD-ĈS"Q?")-# -v HB7(6O݋بFLG lG\ً/b+a6 k5<)W  OmY0FV & -ea* sMp[xp$N)U7EU9VVVWWvK]x_ySd>gYdU[>6,[]N2wJ9ZǝtݒcTs,4-c8|IzJٜ넧,s%Ϸ5rs\y~L&rD`Lk+KXd*s \ No newline at end of file diff --git a/fixture/9/1/31 b/fixture/9/1/31 deleted file mode 100644 index e2df800f93..0000000000 Binary files a/fixture/9/1/31 and /dev/null differ diff --git a/fixture/9/1/32 b/fixture/9/1/32 deleted file mode 100644 index 0b06160e7d..0000000000 --- a/fixture/9/1/32 +++ /dev/null @@ -1,3 +0,0 @@ -xJamaHm pGwc0'Qk#>FFQ(FEQ$JNz٭0;pUp+kHL^ s35noN0m滆a;.I B+/- -3B[{5)o #]C_ĕ3)cF2ZT>UeTζݎʹe/i2t`yPdjY[7-_[Ŏeos,%G'X.8.]qT jqs -mh{"'"n#7Kܨg \ No newline at end of file diff --git a/fixture/9/1/33 b/fixture/9/1/33 deleted file mode 100644 index 3319b494b1..0000000000 --- a/fixture/9/1/33 +++ /dev/null @@ -1,2 +0,0 @@ -x `ov=<' -@:(BuݡvB[x44Ss<3ZuhG56I[fr[XWFģ&6X ]&xľJ87;|< \ No newline at end of file diff --git a/fixture/9/1/4 b/fixture/9/1/4 deleted file mode 100644 index 255ddf6807..0000000000 --- a/fixture/9/1/4 +++ /dev/null @@ -1,4 +0,0 @@ -x[HS϶ME (HìHŒ!YIQ]у$Z !4.=IPFD^}C:9'k^KvAN.E)-%ږ%TrٗeIYͼ=Kv/4 -A' -y}NZ~9*\f]d&\Z17_Ipꑇ;tdl-}LsQ?&*s*=5= _dץ BԵp<q_hد}1Ѵ#L;0uF>NEHj$B[8Jacw628`V&>lZk&g1ʴ8Źv7,ƙI[t]T%[Vk=Ir-? -< \ No newline at end of file diff --git a/fixture/9/1/5 b/fixture/9/1/5 deleted file mode 100644 index db4aaad720..0000000000 Binary files a/fixture/9/1/5 and /dev/null differ diff --git a/fixture/9/1/6 b/fixture/9/1/6 deleted file mode 100644 index dc9705c136..0000000000 Binary files a/fixture/9/1/6 and /dev/null differ diff --git a/fixture/9/1/7 b/fixture/9/1/7 deleted file mode 100644 index 0ecddaf5b3..0000000000 Binary files a/fixture/9/1/7 and /dev/null differ diff --git a/fixture/9/1/8 b/fixture/9/1/8 deleted file mode 100644 index 85231fc36b..0000000000 Binary files a/fixture/9/1/8 and /dev/null differ diff --git a/fixture/9/1/9 b/fixture/9/1/9 deleted file mode 100644 index aeb17db367..0000000000 Binary files a/fixture/9/1/9 and /dev/null differ diff --git a/fixture/9/2/.zarray b/fixture/9/2/.zarray deleted file mode 100644 index be1b7e3f28..0000000000 --- a/fixture/9/2/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks": [ - 100 - ], - "compressor": { - "id": "bz2", - "level": 1 - }, - "dtype": "=64.0.0", "setuptools-scm>1.5.4"] -build-backend = "setuptools.build_meta" +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" [project] name = "zarr" description = "An implementation of chunked, compressed, N-dimensional arrays for Python" readme = { file = "README.md", content-type = "text/markdown" } +authors = [ + { name = "Alistair Miles", email = "alimanfoo@googlemail.com" }, +] maintainers = [ - { name = "Alistair Miles", email = "alimanfoo@googlemail.com" } + { name = "Davis Bennett", email = "davis.v.bennett@gmail.com" }, + { name = "jakirkham" }, + { name = "Josh Moore", email = "josh@openmicroscopy.org" }, + { name = "Joe Hamman", email = "joe@earthmover.io" }, + { name = "Juan Nunez-Iglesias", email = "juan.nunez-iglesias@monash.edu" }, + { name = "Martin Durant", email = "mdurant@anaconda.com" }, + { name = "Norman Rzepka" }, + { name = "Ryan Abernathey" } ] -requires-python = ">=3.10" +requires-python = ">=3.11" +# If you add a new dependency here, please also add it to .pre-commit-config.yml dependencies = [ 'asciitree', - 'numpy>=1.24', - 'fasteners; sys_platform != "emscripten"', - 'numcodecs>=0.10.0', + 'numpy>=1.25', + 'fasteners', + 'numcodecs>=0.10.2', + 'fsspec>2024', + 'crc32c', + 'typing_extensions', + 'donfig', ] dynamic = [ "version", @@ -30,27 +45,57 @@ classifiers = [ 'Topic :: Software Development :: Libraries :: Python Modules', 'Operating System :: Unix', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', ] -license = { text = "MIT" } +license = {text = "MIT License"} +keywords = ["Python", "compressed", "ndimensional-arrays", "zarr"] [project.optional-dependencies] +test = [ + "coverage", + "pytest", + "pytest-cov", + "msgpack", + "lmdb", + "s3fs", + "pytest-asyncio", + "moto[s3]", + "flask-cors", + "flask", + "requests", + "mypy", + "hypothesis", + "universal-pathlib", +] + jupyter = [ 'notebook', 'ipytree>=0.2.2', 'ipywidgets>=8.0.0', ] +gpu = [ + "cupy-cuda12x", +] docs = [ - 'sphinx', - 'sphinx-automodapi', + 'sphinx==8.1.3', + 'sphinx-autobuild>=2021.3.14', + 'sphinx-autoapi==3.3.2', 'sphinx_design', 'sphinx-issues', 'sphinx-copybutton', 'pydata-sphinx-theme', 'numpydoc', 'numcodecs[msgpack]', + 'msgpack', + 'lmdb', +] +extra = [ + 'msgpack', +] +optional = [ + 'lmdb', + 'universal-pathlib', ] [project.urls] @@ -69,22 +114,76 @@ exclude_lines = [ [tool.coverage.run] omit = [ - "zarr/meta_v1.py", + "src/zarr/meta_v1.py", "bench/compress_normal.py", ] -[tool.setuptools] -packages = ["zarr", "zarr._storage", "zarr.tests"] -license-files = ["LICENSE.txt"] +[tool.hatch] +version.source = "vcs" +build.hooks.vcs.version-file = "src/zarr/_version.py" -[tool.setuptools_scm] -version_scheme = "guess-next-dev" -local_scheme = "dirty-tag" -write_to = "zarr/version.py" +[tool.hatch.envs.test] +dependencies = [ + "numpy~={matrix:numpy}", + "universal_pathlib", +] +features = ["test", "extra"] + +[[tool.hatch.envs.test.matrix]] +python = ["3.11", "3.12"] +numpy = ["1.25", "1.26", "2.0"] +version = ["minimal"] + +[[tool.hatch.envs.test.matrix]] +python = ["3.11", "3.12"] +numpy = ["1.25", "1.26", "2.0"] +features = ["optional"] + +[[tool.hatch.envs.test.matrix]] +python = ["3.11", "3.12"] +numpy = ["1.25", "1.26", "2.0"] +features = ["gpu"] + +[tool.hatch.envs.test.scripts] +run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests" +run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=tests" +run = "run-coverage --no-cov" +run-verbose = "run-coverage --verbose" +run-mypy = "mypy src" +run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py tests/v3/test_store/test_stateful*" +list-env = "pip list" + +[tool.hatch.envs.gputest] +dependencies = [ + "numpy~={matrix:numpy}", + "universal_pathlib", +] +features = ["test", "extra", "gpu"] + +[[tool.hatch.envs.gputest.matrix]] +python = ["3.11", "3.12"] +numpy = ["1.25", "1.26", "2.0"] +version = ["minimal"] + +[tool.hatch.envs.gputest.scripts] +run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=tests" +run = "run-coverage --no-cov" +run-verbose = "run-coverage --verbose" +run-mypy = "mypy src" +run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py tests/v3/test_store/test_stateful*" +list-env = "pip list" + +[tool.hatch.envs.docs] +features = ['docs'] + +[tool.hatch.envs.docs.scripts] +build = "cd docs && make html" +serve = "sphinx-autobuild docs docs/_build --host 0.0.0.0" [tool.ruff] line-length = 100 -exclude = [ +force-exclude = true +extend-exclude = [ ".bzr", ".direnv", ".eggs", @@ -99,59 +198,129 @@ exclude = [ "buck-out", "build", "dist", + "notebooks", # temporary, until we achieve compatibility with ruff ≥ 0.6 "venv", - "docs" + "docs", + "src/zarr/v2/", + "tests/v2/", ] [tool.ruff.lint] extend-select = [ - "B" + "ANN", # flake8-annotations + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "FLY", # flynt + "I", # isort + "ISC", # flake8-implicit-str-concat + "PGH", # pygrep-hooks + "PT", # flake8-pytest-style + "PYI", # flake8-pyi + "RSE", # flake8-raise + "RET", # flake8-return + "RUF", + "TCH", # flake8-type-checking + "TRY", # tryceratops + "UP", # pyupgrade +] +ignore = [ + "ANN003", + "ANN101", + "ANN102", + "ANN401", + "PT004", # deprecated + "PT011", # TODO: apply this rule + "PT012", # TODO: apply this rule + "PYI013", + "RET505", + "RET506", + "RUF005", + "TRY003", + # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules + "W191", + "E111", + "E114", + "E117", + "D206", + "D300", + "Q000", + "Q001", + "Q002", + "Q003", + "COM812", + "COM819", + "ISC001", + "ISC002", ] -ignore = ["B905"] # zip-without-explicit-strict -[tool.black] -line-length = 100 -exclude = ''' -/( - \.git - | \.mypy_cache - | \.venv - | _build - | buck-out - | build - | dist - | docs -)/ -''' +[tool.ruff.lint.extend-per-file-ignores] +"tests/**" = ["ANN001", "ANN201"] [tool.mypy] +python_version = "3.11" ignore_missing_imports = true -warn_unused_configs = true -warn_redundant_casts = true -warn_unused_ignores = true +namespace_packages = false + + +strict = true +warn_unreachable = true + +enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] + +[[tool.mypy.overrides]] +module = [ + "zarr.v2.*", +] +ignore_errors = true + +[[tool.mypy.overrides]] +module = [ + "tests.v2.*", + "tests.v3.package_with_entrypoint.*", + "tests.v3.test_codecs.test_codecs", + "tests.v3.test_codecs.test_transpose", + "tests.v3.test_metadata.*", + "tests.v3.test_store.*", + "tests.v3.test_config", + "tests.v3.test_group", + "tests.v3.test_indexing", + "tests.v3.test_properties", + "tests.v3.test_sync", + "tests.v3.test_v2", +] +ignore_errors = true [tool.pytest.ini_options] +minversion = "7" +testpaths = ["tests"] +log_cli_level = "INFO" +xfail_strict = true +asyncio_mode = "auto" doctest_optionflags = [ "NORMALIZE_WHITESPACE", "ELLIPSIS", "IGNORE_EXCEPTION_DETAIL", ] addopts = [ - "--durations=10", + "--durations=10", "-ra", "--strict-config", "--strict-markers", ] filterwarnings = [ "error:::zarr.*", "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning", "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning", - "ignore:The .* is deprecated and will be removed in a Zarr-Python version 3*:FutureWarning", - "ignore:The experimental Zarr V3 implementation in this version .*:FutureWarning", + "ignore:Creating a zarr.buffer.gpu.*:UserWarning", + "ignore:Duplicate name:UserWarning", # from ZipFile ] -doctest_subpackage_requires =[ - "zarr/core.py = numpy>=2", - "zarr/creation.py = numpy>=2" +markers = [ + "gpu: mark a test as requiring CuPy and GPU" ] +[tool.repo-review] +ignore = [ + "PC111", # fix Python code in documentation - enable later + "PC180", # for JavaScript - not interested +] -[tool.codespell] -ignore-words-list = "ba,ihs,kake,nd,noe,nwo,te,fo,zar" -skip = 'fixture,.git' +[tool.numpydoc_validation] +# See https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks for list of checks +checks = ["GL06", "GL07", "GL10", "PR03", "PR05", "PR06"] diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt deleted file mode 100644 index caa078cc82..0000000000 --- a/requirements_dev_minimal.txt +++ /dev/null @@ -1,8 +0,0 @@ -# library requirements -asciitree==0.3.3 -fasteners==0.19 -numcodecs==0.13.1 -msgpack-python==0.5.6 -setuptools-scm==8.1.0 -# test requirements -pytest==8.3.3 diff --git a/requirements_dev_numpy.txt b/requirements_dev_numpy.txt deleted file mode 100644 index 4a619aa3ef..0000000000 --- a/requirements_dev_numpy.txt +++ /dev/null @@ -1,4 +0,0 @@ -# Break this out into a separate file to allow testing against -# different versions of numpy. This file should pin to the latest -# numpy version. -numpy==2.1.2 diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt deleted file mode 100644 index df1d4fd793..0000000000 --- a/requirements_dev_optional.txt +++ /dev/null @@ -1,23 +0,0 @@ -# optional library requirements -# bsddb3==6.2.6; sys_platform != 'win32' -lmdb==1.5.1; sys_platform != 'win32' -# optional library requirements for Jupyter -ipytree==0.2.2 -ipywidgets==8.1.5 -# optional library requirements for services -# don't let pyup change pinning for azure-storage-blob, need to pin to older -# version to get compatibility with azure storage emulator on appveyor (FIXME) -azure-storage-blob==12.21.0 # pyup: ignore -redis==5.1.1 -types-redis -types-setuptools -pymongo==4.10.1 -# optional test requirements -coverage -pytest-cov==5.0.0 -pytest-doctestplus==1.2.1 -pytest-timeout==2.3.1 -h5py==3.12.1 -fsspec==2023.12.2 -s3fs==2023.12.2 -moto[server]>=5.0.1 diff --git a/src/zarr/__init__.py b/src/zarr/__init__.py new file mode 100644 index 0000000000..51116a929e --- /dev/null +++ b/src/zarr/__init__.py @@ -0,0 +1,68 @@ +from zarr._version import version as __version__ +from zarr.api.synchronous import ( + array, + consolidate_metadata, + copy, + copy_all, + copy_store, + create, + empty, + empty_like, + full, + full_like, + group, + load, + ones, + ones_like, + open, + open_array, + open_consolidated, + open_group, + open_like, + save, + save_array, + save_group, + tree, + zeros, + zeros_like, +) +from zarr.core.array import Array, AsyncArray +from zarr.core.config import config +from zarr.core.group import AsyncGroup, Group + +# in case setuptools scm screw up and find version to be 0.0.0 +assert not __version__.startswith("0.0.0") + +__all__ = [ + "Array", + "AsyncArray", + "AsyncGroup", + "Group", + "__version__", + "array", + "config", + "consolidate_metadata", + "copy", + "copy_all", + "copy_store", + "create", + "empty", + "empty_like", + "full", + "full_like", + "group", + "load", + "ones", + "ones_like", + "open", + "open_array", + "open_consolidated", + "open_group", + "open_like", + "save", + "save_array", + "save_group", + "tree", + "zeros", + "zeros_like", +] diff --git a/src/zarr/_compat.py b/src/zarr/_compat.py new file mode 100644 index 0000000000..52d96005cc --- /dev/null +++ b/src/zarr/_compat.py @@ -0,0 +1,68 @@ +import warnings +from collections.abc import Callable +from functools import wraps +from inspect import Parameter, signature +from typing import Any, TypeVar + +T = TypeVar("T") + +# Based off https://github.com/scikit-learn/scikit-learn/blob/e87b32a81c70abed8f2e97483758eb64df8255e9/sklearn/utils/validation.py#L63 + + +def _deprecate_positional_args( + func: Callable[..., T] | None = None, *, version: str = "3.1.0" +) -> Callable[..., T]: + """Decorator for methods that issues warnings for positional arguments. + + Using the keyword-only argument syntax in pep 3102, arguments after the + * will issue a warning when passed as a positional argument. + + Parameters + ---------- + func : callable, default=None + Function to check arguments on. + version : callable, default="3.1.0" + The version when positional arguments will result in error. + """ + + def _inner_deprecate_positional_args(f: Callable[..., T]) -> Callable[..., T]: + sig = signature(f) + kwonly_args = [] + all_args = [] + + for name, param in sig.parameters.items(): + if param.kind == Parameter.POSITIONAL_OR_KEYWORD: + all_args.append(name) + elif param.kind == Parameter.KEYWORD_ONLY: + kwonly_args.append(name) + + @wraps(f) + def inner_f(*args: Any, **kwargs: Any) -> T: + extra_args = len(args) - len(all_args) + if extra_args <= 0: + return f(*args, **kwargs) + + # extra_args > 0 + args_msg = [ + f"{name}={arg}" + for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:], strict=False) + ] + formatted_args_msg = ", ".join(args_msg) + warnings.warn( + ( + f"Pass {formatted_args_msg} as keyword args. From version " + f"{version} passing these as positional arguments " + "will result in an error" + ), + FutureWarning, + stacklevel=2, + ) + kwargs.update(zip(sig.parameters, args, strict=False)) + return f(**kwargs) + + return inner_f + + if func is not None: + return _inner_deprecate_positional_args(func) + + return _inner_deprecate_positional_args # type: ignore[return-value] diff --git a/zarr/_storage/__init__.py b/src/zarr/abc/__init__.py similarity index 100% rename from zarr/_storage/__init__.py rename to src/zarr/abc/__init__.py diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py new file mode 100644 index 0000000000..73b1a598b9 --- /dev/null +++ b/src/zarr/abc/codec.py @@ -0,0 +1,429 @@ +from __future__ import annotations + +from abc import abstractmethod +from typing import TYPE_CHECKING, Any, Generic, TypeVar + +from zarr.abc.metadata import Metadata +from zarr.core.buffer import Buffer, NDBuffer +from zarr.core.common import ChunkCoords, concurrent_map +from zarr.core.config import config + +if TYPE_CHECKING: + from collections.abc import Awaitable, Callable, Iterable + from typing import Self + + import numpy as np + + from zarr.abc.store import ByteGetter, ByteSetter + from zarr.core.array_spec import ArraySpec + from zarr.core.chunk_grids import ChunkGrid + from zarr.core.indexing import SelectorTuple + +__all__ = [ + "ArrayArrayCodec", + "ArrayBytesCodec", + "ArrayBytesCodecPartialDecodeMixin", + "ArrayBytesCodecPartialEncodeMixin", + "BaseCodec", + "BytesBytesCodec", + "CodecInput", + "CodecOutput", + "CodecPipeline", +] + +CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer) +CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer) + + +class BaseCodec(Metadata, Generic[CodecInput, CodecOutput]): + """Generic base class for codecs. + + Codecs can be registered via zarr.codecs.registry. + + Warnings + -------- + This class is not intended to be directly, please use + ArrayArrayCodec, ArrayBytesCodec or BytesBytesCodec for subclassing. + """ + + is_fixed_size: bool + + @abstractmethod + def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: + """Given an input byte length, this method returns the output byte length. + Raises a NotImplementedError for codecs with variable-sized outputs (e.g. compressors). + + Parameters + ---------- + input_byte_length : int + chunk_spec : ArraySpec + + Returns + ------- + int + """ + ... + + def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: + """Computed the spec of the chunk after it has been encoded by the codec. + This is important for codecs that change the shape, data type or fill value of a chunk. + The spec will then be used for subsequent codecs in the pipeline. + + Parameters + ---------- + chunk_spec : ArraySpec + + Returns + ------- + ArraySpec + """ + return chunk_spec + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + """Fills in codec configuration parameters that can be automatically + inferred from the array metadata. + + Parameters + ---------- + chunk_spec : ArraySpec + + Returns + ------- + Self + """ + return self + + def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None: + """Validates that the codec configuration is compatible with the array metadata. + Raises errors when the codec configuration is not compatible. + + Parameters + ---------- + shape: ChunkCoords + The array shape + dtype: np.dtype[Any] + The array data type + chunk_grid: ChunkGrid + The array chunk grid + """ + ... + + async def _decode_single(self, chunk_data: CodecOutput, chunk_spec: ArraySpec) -> CodecInput: + raise NotImplementedError + + async def decode( + self, + chunks_and_specs: Iterable[tuple[CodecOutput | None, ArraySpec]], + ) -> Iterable[CodecInput | None]: + """Decodes a batch of chunks. + Chunks can be None in which case they are ignored by the codec. + + Parameters + ---------- + chunks_and_specs : Iterable[tuple[CodecOutput | None, ArraySpec]] + Ordered set of encoded chunks with their accompanying chunk spec. + + Returns + ------- + Iterable[CodecInput | None] + """ + return await _batching_helper(self._decode_single, chunks_and_specs) + + async def _encode_single( + self, chunk_data: CodecInput, chunk_spec: ArraySpec + ) -> CodecOutput | None: + raise NotImplementedError + + async def encode( + self, + chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]], + ) -> Iterable[CodecOutput | None]: + """Encodes a batch of chunks. + Chunks can be None in which case they are ignored by the codec. + + Parameters + ---------- + chunks_and_specs : Iterable[tuple[CodecInput | None, ArraySpec]] + Ordered set of to-be-encoded chunks with their accompanying chunk spec. + + Returns + ------- + Iterable[CodecOutput | None] + """ + return await _batching_helper(self._encode_single, chunks_and_specs) + + +class ArrayArrayCodec(BaseCodec[NDBuffer, NDBuffer]): + """Base class for array-to-array codecs.""" + + ... + + +class ArrayBytesCodec(BaseCodec[NDBuffer, Buffer]): + """Base class for array-to-bytes codecs.""" + + ... + + +class BytesBytesCodec(BaseCodec[Buffer, Buffer]): + """Base class for bytes-to-bytes codecs.""" + + ... + + +Codec = ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec + + +class ArrayBytesCodecPartialDecodeMixin: + """Mixin for array-to-bytes codecs that implement partial decoding.""" + + async def _decode_partial_single( + self, byte_getter: ByteGetter, selection: SelectorTuple, chunk_spec: ArraySpec + ) -> NDBuffer | None: + raise NotImplementedError + + async def decode_partial( + self, + batch_info: Iterable[tuple[ByteGetter, SelectorTuple, ArraySpec]], + ) -> Iterable[NDBuffer | None]: + """Partially decodes a batch of chunks. + This method determines parts of a chunk from the slice selection, + fetches these parts from the store (via ByteGetter) and decodes them. + + Parameters + ---------- + batch_info : Iterable[tuple[ByteGetter, SelectorTuple, ArraySpec]] + Ordered set of information about slices of encoded chunks. + The slice selection determines which parts of the chunk will be fetched. + The ByteGetter is used to fetch the necessary bytes. + The chunk spec contains information about the construction of an array from the bytes. + + Returns + ------- + Iterable[NDBuffer | None] + """ + return await concurrent_map( + list(batch_info), + self._decode_partial_single, + config.get("async.concurrency"), + ) + + +class ArrayBytesCodecPartialEncodeMixin: + """Mixin for array-to-bytes codecs that implement partial encoding.""" + + async def _encode_partial_single( + self, + byte_setter: ByteSetter, + chunk_array: NDBuffer, + selection: SelectorTuple, + chunk_spec: ArraySpec, + ) -> None: + raise NotImplementedError + + async def encode_partial( + self, + batch_info: Iterable[tuple[ByteSetter, NDBuffer, SelectorTuple, ArraySpec]], + ) -> None: + """Partially encodes a batch of chunks. + This method determines parts of a chunk from the slice selection, encodes them and + writes these parts to the store (via ByteSetter). + If merging with existing chunk data in the store is necessary, this method will + read from the store first and perform the merge. + + Parameters + ---------- + batch_info : Iterable[tuple[ByteSetter, NDBuffer, SelectorTuple, ArraySpec]] + Ordered set of information about slices of to-be-encoded chunks. + The slice selection determines which parts of the chunk will be encoded. + The ByteSetter is used to write the necessary bytes and fetch bytes for existing chunk data. + The chunk spec contains information about the chunk. + """ + await concurrent_map( + list(batch_info), + self._encode_partial_single, + config.get("async.concurrency"), + ) + + +class CodecPipeline: + """Base class for implementing CodecPipeline. + A CodecPipeline implements the read and write paths for chunk data. + On the read path, it is responsible for fetching chunks from a store (via ByteGetter), + decoding them and assembling an output array. On the write path, it encodes the chunks + and writes them to a store (via ByteSetter).""" + + @abstractmethod + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + """Fills in codec configuration parameters that can be automatically + inferred from the array metadata. + + Parameters + ---------- + array_spec : ArraySpec + + Returns + ------- + Self + """ + ... + + @classmethod + @abstractmethod + def from_codecs(cls, codecs: Iterable[Codec]) -> Self: + """Creates a codec pipeline from an iterable of codecs. + + Parameters + ---------- + codecs : Iterable[Codec] + + Returns + ------- + Self + """ + ... + + @property + @abstractmethod + def supports_partial_decode(self) -> bool: ... + + @property + @abstractmethod + def supports_partial_encode(self) -> bool: ... + + @abstractmethod + def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None: + """Validates that all codec configurations are compatible with the array metadata. + Raises errors when a codec configuration is not compatible. + + Parameters + ---------- + shape: ChunkCoords + The array shape + dtype: np.dtype[Any] + The array data type + chunk_grid: ChunkGrid + The array chunk grid + """ + ... + + @abstractmethod + def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: + """Given an input byte length, this method returns the output byte length. + Raises a NotImplementedError for codecs with variable-sized outputs (e.g. compressors). + + Parameters + ---------- + input_byte_length : int + array_spec : ArraySpec + + Returns + ------- + int + """ + ... + + @abstractmethod + async def decode( + self, + chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], + ) -> Iterable[NDBuffer | None]: + """Decodes a batch of chunks. + Chunks can be None in which case they are ignored by the codec. + + Parameters + ---------- + chunks_and_specs : Iterable[tuple[Buffer | None, ArraySpec]] + Ordered set of encoded chunks with their accompanying chunk spec. + + Returns + ------- + Iterable[NDBuffer | None] + """ + ... + + @abstractmethod + async def encode( + self, + chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]], + ) -> Iterable[Buffer | None]: + """Encodes a batch of chunks. + Chunks can be None in which case they are ignored by the codec. + + Parameters + ---------- + chunks_and_specs : Iterable[tuple[NDBuffer | None, ArraySpec]] + Ordered set of to-be-encoded chunks with their accompanying chunk spec. + + Returns + ------- + Iterable[Buffer | None] + """ + ... + + @abstractmethod + async def read( + self, + batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]], + out: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> None: + """Reads chunk data from the store, decodes it and writes it into an output array. + Partial decoding may be utilized if the codecs and stores support it. + + Parameters + ---------- + batch_info : Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]] + Ordered set of information about the chunks. + The first slice selection determines which parts of the chunk will be fetched. + The second slice selection determines where in the output array the chunk data will be written. + The ByteGetter is used to fetch the necessary bytes. + The chunk spec contains information about the construction of an array from the bytes. + out : NDBuffer + """ + ... + + @abstractmethod + async def write( + self, + batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]], + value: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> None: + """Encodes chunk data and writes it to the store. + Merges with existing chunk data by reading first, if necessary. + Partial encoding may be utilized if the codecs and stores support it. + + Parameters + ---------- + batch_info : Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]] + Ordered set of information about the chunks. + The first slice selection determines which parts of the chunk will be encoded. + The second slice selection determines where in the value array the chunk data is located. + The ByteSetter is used to fetch and write the necessary bytes. + The chunk spec contains information about the chunk. + value : NDBuffer + """ + ... + + +async def _batching_helper( + func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput | None]], + batch_info: Iterable[tuple[CodecInput | None, ArraySpec]], +) -> list[CodecOutput | None]: + return await concurrent_map( + list(batch_info), + _noop_for_none(func), + config.get("async.concurrency"), + ) + + +def _noop_for_none( + func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput | None]], +) -> Callable[[CodecInput | None, ArraySpec], Awaitable[CodecOutput | None]]: + async def wrap(chunk: CodecInput | None, chunk_spec: ArraySpec) -> CodecOutput | None: + if chunk is None: + return None + return await func(chunk, chunk_spec) + + return wrap diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py new file mode 100644 index 0000000000..291ceb459c --- /dev/null +++ b/src/zarr/abc/metadata.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from collections.abc import Sequence +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Self + + from zarr.core.common import JSON + +from dataclasses import dataclass, fields + +__all__ = ["Metadata"] + + +@dataclass(frozen=True) +class Metadata: + def to_dict(self) -> dict[str, JSON]: + """ + Recursively serialize this model to a dictionary. + This method inspects the fields of self and calls `x.to_dict()` for any fields that + are instances of `Metadata`. Sequences of `Metadata` are similarly recursed into, and + the output of that recursion is collected in a list. + """ + out_dict = {} + for field in fields(self): + key = field.name + value = getattr(self, key) + if isinstance(value, Metadata): + out_dict[field.name] = getattr(self, field.name).to_dict() + elif isinstance(value, str): + out_dict[key] = value + elif isinstance(value, Sequence): + out_dict[key] = tuple(v.to_dict() if isinstance(v, Metadata) else v for v in value) + else: + out_dict[key] = value + + return out_dict + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + """ + Create an instance of the model from a dictionary + """ + ... + + return cls(**data) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py new file mode 100644 index 0000000000..85e335089d --- /dev/null +++ b/src/zarr/abc/store.py @@ -0,0 +1,425 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from asyncio import gather +from types import TracebackType +from typing import TYPE_CHECKING, NamedTuple, Protocol, runtime_checkable + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Iterable + from types import TracebackType + from typing import Any, Self, TypeAlias + + from zarr.core.buffer import Buffer, BufferPrototype + from zarr.core.common import AccessModeLiteral, BytesLike + +__all__ = ["AccessMode", "ByteGetter", "ByteSetter", "Store", "set_or_delete"] + +ByteRangeRequest: TypeAlias = tuple[int | None, int | None] + + +class AccessMode(NamedTuple): + """Access mode flags.""" + + str: AccessModeLiteral + readonly: bool + overwrite: bool + create: bool + update: bool + + @classmethod + def from_literal(cls, mode: AccessModeLiteral) -> Self: + """ + Create an AccessMode instance from a literal. + + Parameters + ---------- + mode : AccessModeLiteral + One of 'r', 'r+', 'w', 'w-', 'a'. + + Returns + ------- + AccessMode + The created instance. + + Raises + ------ + ValueError + If mode is not one of 'r', 'r+', 'w', 'w-', 'a'. + """ + if mode in ("r", "r+", "a", "w", "w-"): + return cls( + str=mode, + readonly=mode == "r", + overwrite=mode == "w", + create=mode in ("a", "w", "w-"), + update=mode in ("r+", "a"), + ) + raise ValueError("mode must be one of 'r', 'r+', 'w', 'w-', 'a'") + + +class Store(ABC): + """ + Abstract base class for Zarr stores. + """ + + _mode: AccessMode + _is_open: bool + + def __init__(self, *args: Any, mode: AccessModeLiteral = "r", **kwargs: Any) -> None: + self._is_open = False + self._mode = AccessMode.from_literal(mode) + + @classmethod + async def open(cls, *args: Any, **kwargs: Any) -> Self: + """ + Create and open the store. + + Parameters + ---------- + *args : Any + Positional arguments to pass to the store constructor. + **kwargs : Any + Keyword arguments to pass to the store constructor. + + Returns + ------- + Store + The opened store instance. + """ + store = cls(*args, **kwargs) + await store._open() + return store + + def __enter__(self) -> Self: + """Enter a context manager that will close the store upon exiting.""" + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + """Close the store.""" + self.close() + + async def _open(self) -> None: + """ + Open the store. + + Raises + ------ + ValueError + If the store is already open. + FileExistsError + If ``mode='w-'`` and the store already exists. + + Notes + ----- + * When ``mode='w'`` and the store already exists, it will be cleared. + """ + if self._is_open: + raise ValueError("store is already open") + if self.mode.str == "w": + await self.clear() + elif self.mode.str == "w-" and not await self.empty(): + raise FileExistsError("Store already exists") + self._is_open = True + + async def _ensure_open(self) -> None: + """Open the store if it is not already open.""" + if not self._is_open: + await self._open() + + @abstractmethod + async def empty(self) -> bool: + """ + Check if the store is empty. + + Returns + ------- + bool + True if the store is empty, False otherwise. + """ + ... + + @abstractmethod + async def clear(self) -> None: + """ + Clear the store. + + Remove all keys and values from the store. + """ + ... + + @abstractmethod + def with_mode(self, mode: AccessModeLiteral) -> Self: + """ + Return a new store of the same type pointing to the same location with a new mode. + + The returned Store is not automatically opened. Call :meth:`Store.open` before + using. + + Parameters + ---------- + mode: AccessModeLiteral + The new mode to use. + + Returns + ------- + store: + A new store of the same type with the new mode. + + Examples + -------- + >>> writer = zarr.store.MemoryStore(mode="w") + >>> reader = writer.with_mode("r") + """ + ... + + @property + def mode(self) -> AccessMode: + """Access mode of the store.""" + return self._mode + + def _check_writable(self) -> None: + """Raise an exception if the store is not writable.""" + if self.mode.readonly: + raise ValueError("store mode does not support writing") + + @abstractmethod + def __eq__(self, value: object) -> bool: + """Equality comparison.""" + ... + + @abstractmethod + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: ByteRangeRequest | None = None, + ) -> Buffer | None: + """Retrieve the value associated with a given key. + + Parameters + ---------- + key : str + byte_range : tuple[int | None, int | None], optional + + Returns + ------- + Buffer + """ + ... + + @abstractmethod + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: Iterable[tuple[str, ByteRangeRequest]], + ) -> list[Buffer | None]: + """Retrieve possibly partial values from given key_ranges. + + Parameters + ---------- + key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]] + Ordered set of key, range pairs, a key may occur multiple times with different ranges + + Returns + ------- + list of values, in the order of the key_ranges, may contain null/none for missing keys + """ + ... + + @abstractmethod + async def exists(self, key: str) -> bool: + """Check if a key exists in the store. + + Parameters + ---------- + key : str + + Returns + ------- + bool + """ + ... + + @property + @abstractmethod + def supports_writes(self) -> bool: + """Does the store support writes?""" + ... + + @abstractmethod + async def set(self, key: str, value: Buffer) -> None: + """Store a (key, value) pair. + + Parameters + ---------- + key : str + value : Buffer + """ + ... + + async def set_if_not_exists(self, key: str, value: Buffer) -> None: + """ + Store a key to ``value`` if the key is not already present. + + Parameters + ---------- + key : str + value : Buffer + """ + # Note for implementers: the default implementation provided here + # is not safe for concurrent writers. There's a race condition between + # the `exists` check and the `set` where another writer could set some + # value at `key` or delete `key`. + if not await self.exists(key): + await self.set(key, value) + + async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None: + """ + Insert multiple (key, value) pairs into storage. + """ + await gather(*(self.set(key, value) for key, value in values)) + return + + @property + @abstractmethod + def supports_deletes(self) -> bool: + """Does the store support deletes?""" + ... + + @abstractmethod + async def delete(self, key: str) -> None: + """Remove a key from the store + + Parameters + ---------- + key : str + """ + ... + + @property + @abstractmethod + def supports_partial_writes(self) -> bool: + """Does the store support partial writes?""" + ... + + @abstractmethod + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, BytesLike]] + ) -> None: + """Store values at a given key, starting at byte range_start. + + Parameters + ---------- + key_start_values : list[tuple[str, int, BytesLike]] + set of key, range_start, values triples, a key may occur multiple times with different + range_starts, range_starts (considering the length of the respective values) must not + specify overlapping ranges for the same key + """ + ... + + @property + @abstractmethod + def supports_listing(self) -> bool: + """Does the store support listing?""" + ... + + @abstractmethod + def list(self) -> AsyncGenerator[str, None]: + """Retrieve all keys in the store. + + Returns + ------- + AsyncGenerator[str, None] + """ + ... + + @abstractmethod + def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + """ + Retrieve all keys in the store that begin with a given prefix. Keys are returned with the + common leading prefix removed. + + Parameters + ---------- + prefix : str + + Returns + ------- + AsyncGenerator[str, None] + """ + ... + + @abstractmethod + def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + """ + Retrieve all keys and prefixes with a given prefix and which do not contain the character + “/” after the given prefix. + + Parameters + ---------- + prefix : str + + Returns + ------- + AsyncGenerator[str, None] + """ + ... + + def close(self) -> None: + """Close the store.""" + self._is_open = False + + async def _get_many( + self, requests: Iterable[tuple[str, BufferPrototype, ByteRangeRequest | None]] + ) -> AsyncGenerator[tuple[str, Buffer | None], None]: + """ + Retrieve a collection of objects from storage. In general this method does not guarantee + that objects will be retrieved in the order in which they were requested, so this method + yields tuple[str, Buffer | None] instead of just Buffer | None + """ + for req in requests: + yield (req[0], await self.get(*req)) + + +@runtime_checkable +class ByteGetter(Protocol): + async def get( + self, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None + ) -> Buffer | None: ... + + +@runtime_checkable +class ByteSetter(Protocol): + async def get( + self, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None + ) -> Buffer | None: ... + + async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None: ... + + async def delete(self) -> None: ... + + async def set_if_not_exists(self, default: Buffer) -> None: ... + + +async def set_or_delete(byte_setter: ByteSetter, value: Buffer | None) -> None: + """Set or delete a value in a byte setter + + Parameters + ---------- + byte_setter : ByteSetter + value : Buffer | None + + Notes + ----- + If value is None, the key will be deleted. + """ + if value is None: + await byte_setter.delete() + else: + await byte_setter.set(value) diff --git a/zarr/tests/__init__.py b/src/zarr/api/__init__.py similarity index 100% rename from zarr/tests/__init__.py rename to src/zarr/api/__init__.py diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py new file mode 100644 index 0000000000..559049ae4f --- /dev/null +++ b/src/zarr/api/asynchronous.py @@ -0,0 +1,1157 @@ +from __future__ import annotations + +import asyncio +import dataclasses +import warnings +from typing import TYPE_CHECKING, Any, Literal, cast + +import numpy as np +import numpy.typing as npt + +from zarr.abc.store import Store +from zarr.core.array import Array, AsyncArray, get_array_metadata +from zarr.core.common import ( + JSON, + AccessModeLiteral, + ChunkCoords, + MemoryOrder, + ZarrFormat, +) +from zarr.core.config import config +from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata +from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata +from zarr.errors import NodeTypeValidationError +from zarr.storage import ( + StoreLike, + StorePath, + make_store_path, +) + +if TYPE_CHECKING: + from collections.abc import Iterable + + from zarr.abc.codec import Codec + from zarr.core.buffer import NDArrayLike + from zarr.core.chunk_key_encodings import ChunkKeyEncoding + + # TODO: this type could use some more thought + ArrayLike = AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array | npt.NDArray[Any] + PathLike = str + +__all__ = [ + "array", + "consolidate_metadata", + "copy", + "copy_all", + "copy_store", + "create", + "empty", + "empty_like", + "full", + "full_like", + "group", + "load", + "ones", + "ones_like", + "open", + "open_array", + "open_consolidated", + "open_group", + "open_like", + "save", + "save_array", + "save_group", + "tree", + "zeros", + "zeros_like", +] + + +def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ChunkCoords | None, ChunkCoords | None]: + """helper function to get the shape and chunks from an array-like object""" + shape = None + chunks = None + + if hasattr(a, "shape") and isinstance(a.shape, tuple): + shape = a.shape + + if hasattr(a, "chunks") and isinstance(a.chunks, tuple) and (len(a.chunks) == len(a.shape)): + chunks = a.chunks + + elif hasattr(a, "chunklen"): + # bcolz carray + chunks = (a.chunklen,) + a.shape[1:] + + return shape, chunks + + +def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> dict[str, Any]: + """set default values for shape and chunks if they are not present in the array-like object""" + + new = kwargs.copy() + + shape, chunks = _get_shape_chunks(a) + if shape is not None: + new["shape"] = shape + if chunks is not None: + new["chunks"] = chunks + + if hasattr(a, "dtype"): + new["dtype"] = a.dtype + + if isinstance(a, AsyncArray): + new["order"] = a.order + if isinstance(a.metadata, ArrayV2Metadata): + new["compressor"] = a.metadata.compressor + new["filters"] = a.metadata.filters + else: + # TODO: Remove type: ignore statement when type inference improves. + # mypy cannot correctly infer the type of a.metadata here for some reason. + new["codecs"] = a.metadata.codecs # type: ignore[unreachable] + + else: + # TODO: set default values compressor/codecs + # to do this, we may need to evaluate if this is a v2 or v3 array + # new["compressor"] = "default" + pass + + return new + + +def _handle_zarr_version_or_format( + *, zarr_version: ZarrFormat | None, zarr_format: ZarrFormat | None +) -> ZarrFormat | None: + """handle the deprecated zarr_version kwarg and return zarr_format""" + if zarr_format is not None and zarr_version is not None and zarr_format != zarr_version: + raise ValueError( + f"zarr_format {zarr_format} does not match zarr_version {zarr_version}, please only set one" + ) + if zarr_version is not None: + warnings.warn( + "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 + ) + return zarr_version + return zarr_format + + +def _default_zarr_version() -> ZarrFormat: + """return the default zarr_version""" + return cast(ZarrFormat, int(config.get("default_zarr_version", 3))) + + +async def consolidate_metadata( + store: StoreLike, + path: str | None = None, + zarr_format: ZarrFormat | None = None, +) -> AsyncGroup: + """ + Consolidate the metadata of all nodes in a hierarchy. + + Upon completion, the metadata of the root node in the Zarr hierarchy will be + updated to include all the metadata of child nodes. + + Parameters + ---------- + store: StoreLike + The store-like object whose metadata you wish to consolidate. + path: str, optional + A path to a group in the store to consolidate at. Only children + below that group will be consolidated. + + By default, the root node is used so all the metadata in the + store is consolidated. + zarr_format : {2, 3, None}, optional + The zarr format of the hierarchy. By default the zarr format + is inferred. + + Returns + ------- + group: AsyncGroup + The group, with the ``consolidated_metadata`` field set to include + the metadata of each child node. + """ + store_path = await make_store_path(store) + + if path is not None: + store_path = store_path / path + + group = await AsyncGroup.open(store_path, zarr_format=zarr_format, use_consolidated=False) + group.store_path.store._check_writable() + + members_metadata = {k: v.metadata async for k, v in group.members(max_depth=None)} + + # While consolidating, we want to be explicit about when child groups + # are empty by inserting an empty dict for consolidated_metadata.metadata + for k, v in members_metadata.items(): + if isinstance(v, GroupMetadata) and v.consolidated_metadata is None: + v = dataclasses.replace(v, consolidated_metadata=ConsolidatedMetadata(metadata={})) + members_metadata[k] = v + + ConsolidatedMetadata._flat_to_nested(members_metadata) + + consolidated_metadata = ConsolidatedMetadata(metadata=members_metadata) + metadata = dataclasses.replace(group.metadata, consolidated_metadata=consolidated_metadata) + group = dataclasses.replace( + group, + metadata=metadata, + ) + await group._save_metadata() + return group + + +async def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + raise NotImplementedError + + +async def copy_all(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + raise NotImplementedError + + +async def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + raise NotImplementedError + + +async def load( + *, + store: StoreLike, + path: str | None = None, + zarr_format: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, +) -> NDArrayLike | dict[str, NDArrayLike]: + """Load data from an array or group into memory. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + path : str or None, optional + The path within the store from which to load. + + Returns + ------- + out + If the path contains an array, out will be a numpy array. If the path contains + a group, out will be a dict-like object where keys are array names and values + are numpy arrays. + + See Also + -------- + save, savez + + Notes + ----- + If loading data from a group of arrays, data will not be immediately loaded into + memory. Rather, arrays will be loaded into memory as they are requested. + """ + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + obj = await open(store=store, path=path, zarr_format=zarr_format) + if isinstance(obj, AsyncArray): + return await obj.getitem(slice(None)) + else: + raise NotImplementedError("loading groups not yet supported") + + +async def open( + *, + store: StoreLike | None = None, + mode: AccessModeLiteral | None = None, # type and value changed + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + storage_options: dict[str, Any] | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to open_array +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup: + """Convenience function to open a group or array using file-mode-like semantics. + + Parameters + ---------- + store : Store or str, optional + Store or path to directory in file system or name of zip file. + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store to open. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + **kwargs + Additional parameters are passed through to :func:`zarr.creation.open_array` or + :func:`zarr.hierarchy.open_group`. + + Returns + ------- + z : array or group + Return type depends on what exists in the given store. + """ + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) + + if path is not None: + store_path = store_path / path + + if "shape" not in kwargs and mode in {"a", "w", "w-"}: + try: + metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) + # TODO: remove this cast when we fix typing for array metadata dicts + _metadata_dict = cast(ArrayMetadataDict, metadata_dict) + # for v2, the above would already have raised an exception if not an array + zarr_format = _metadata_dict["zarr_format"] + is_v3_array = zarr_format == 3 and _metadata_dict.get("node_type") == "array" + if is_v3_array or zarr_format == 2: + return AsyncArray(store_path=store_path, metadata=_metadata_dict) + except (AssertionError, FileNotFoundError): + pass + return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + + try: + return await open_array(store=store_path, zarr_format=zarr_format, **kwargs) + except (KeyError, NodeTypeValidationError): + # KeyError for a missing key + # NodeTypeValidationError for failing to parse node metadata as an array when it's + # actually a group + return await open_group(store=store_path, zarr_format=zarr_format, **kwargs) + + +async def open_consolidated( + *args: Any, use_consolidated: Literal[True] = True, **kwargs: Any +) -> AsyncGroup: + """ + Alias for :func:`open_group` with ``use_consolidated=True``. + """ + if use_consolidated is not True: + raise TypeError( + "'use_consolidated' must be 'True' in 'open_consolidated'. Use 'open' with " + "'use_consolidated=False' to bypass consolidated metadata." + ) + return await open_group(*args, use_consolidated=use_consolidated, **kwargs) + + +async def save( + store: StoreLike, + *args: NDArrayLike, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to save +) -> None: + """Convenience function to save an array or group of arrays to the local file system. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the group where the arrays will be saved. + kwargs + NumPy arrays with data to save. + """ + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + if len(args) == 0 and len(kwargs) == 0: + raise ValueError("at least one array must be provided") + if len(args) == 1 and len(kwargs) == 0: + await save_array(store, args[0], zarr_format=zarr_format, path=path) + else: + await save_group(store, *args, zarr_format=zarr_format, path=path, **kwargs) + + +async def save_array( + store: StoreLike, + arr: NDArrayLike, + *, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + storage_options: dict[str, Any] | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to create +) -> None: + """Convenience function to save a NumPy array to the local file system, following a + similar API to the NumPy save() function. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + arr : ndarray + NumPy array with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store where the array will be saved. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + kwargs + Passed through to :func:`create`, e.g., compressor. + """ + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) + + mode = kwargs.pop("mode", None) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) + if path is not None: + store_path = store_path / path + new = await AsyncArray.create( + store_path, + zarr_format=zarr_format, + shape=arr.shape, + dtype=arr.dtype, + chunks=arr.shape, + **kwargs, + ) + await new.setitem(slice(None), arr) + + +async def save_group( + store: StoreLike, + *args: NDArrayLike, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + storage_options: dict[str, Any] | None = None, + **kwargs: NDArrayLike, +) -> None: + """Convenience function to save several NumPy arrays to the local file system, following a + similar API to the NumPy savez()/savez_compressed() functions. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + Path within the store where the group will be saved. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + kwargs + NumPy arrays with data to save. + """ + zarr_format = ( + _handle_zarr_version_or_format( + zarr_version=zarr_version, + zarr_format=zarr_format, + ) + or _default_zarr_version() + ) + + if len(args) == 0 and len(kwargs) == 0: + raise ValueError("at least one array must be provided") + aws = [] + for i, arr in enumerate(args): + aws.append( + save_array( + store, + arr, + zarr_format=zarr_format, + path=f"{path}/arr_{i}", + storage_options=storage_options, + ) + ) + for k, arr in kwargs.items(): + _path = f"{path}/{k}" if path is not None else k + aws.append( + save_array( + store, arr, zarr_format=zarr_format, path=_path, storage_options=storage_options + ) + ) + await asyncio.gather(*aws) + + +async def tree(*args: Any, **kwargs: Any) -> None: + raise NotImplementedError + + +async def array( + data: npt.ArrayLike, **kwargs: Any +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an array filled with `data`. + + Parameters + ---------- + data : array_like + The data to fill the array with. + kwargs + Passed through to :func:`create`. + + Returns + ------- + array : array + The new array. + """ + + # ensure data is array-like + if not hasattr(data, "shape") or not hasattr(data, "dtype"): + data = np.asanyarray(data) + + # setup dtype + kw_dtype = kwargs.get("dtype") + if kw_dtype is None: + kwargs["dtype"] = data.dtype + else: + kwargs["dtype"] = kw_dtype + + # setup shape and chunks + data_shape, data_chunks = _get_shape_chunks(data) + kwargs["shape"] = data_shape + kw_chunks = kwargs.get("chunks") + if kw_chunks is None: + kwargs["chunks"] = data_chunks + else: + kwargs["chunks"] = kw_chunks + + read_only = kwargs.pop("read_only", False) + if read_only: + raise ValueError("read_only=True is no longer supported when creating new arrays") + + # instantiate array + z = await create(**kwargs) + + # fill with data + await z.setitem(slice(None), data) + + return z + + +async def group( + *, # Note: this is a change from v2 + store: StoreLike | None = None, + overwrite: bool = False, + chunk_store: StoreLike | None = None, # not used + cache_attrs: bool | None = None, # not used, default changed + synchronizer: Any | None = None, # not used + path: str | None = None, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used + attributes: dict[str, JSON] | None = None, + storage_options: dict[str, Any] | None = None, +) -> AsyncGroup: + """Create a group. + + Parameters + ---------- + store : Store or str, optional + Store or path to directory in file system. + overwrite : bool, optional + If True, delete any pre-existing data in `store` at `path` before + creating the group. + chunk_store : Store, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : str, optional + Group path within store. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + + Returns + ------- + g : group + The new group. + """ + + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a") + + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) + if path is not None: + store_path = store_path / path + + if chunk_store is not None: + warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_attrs is not None: + warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) + if synchronizer is not None: + warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) + if meta_array is not None: + warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) + + if attributes is None: + attributes = {} + + try: + return await AsyncGroup.open(store=store_path, zarr_format=zarr_format) + except (KeyError, FileNotFoundError): + return await AsyncGroup.from_store( + store=store_path, + zarr_format=zarr_format or _default_zarr_version(), + exists_ok=overwrite, + attributes=attributes, + ) + + +async def open_group( + store: StoreLike | None = None, + *, # Note: this is a change from v2 + mode: AccessModeLiteral | None = None, + cache_attrs: bool | None = None, # not used, default changed + synchronizer: Any = None, # not used + path: str | None = None, + chunk_store: StoreLike | None = None, # not used + storage_options: dict[str, Any] | None = None, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used + attributes: dict[str, JSON] | None = None, + use_consolidated: bool | str | None = None, +) -> AsyncGroup: + """Open a group using file-mode-like semantics. + + Parameters + ---------- + store : Store, str, or mapping, optional + Store or path to directory in file system or name of zip file. + + Strings are interpreted as paths on the local file system + and used as the ``root`` argument to :class:`zarr.store.LocalStore`. + + Dictionaries are used as the ``store_dict`` argument in + :class:`zarr.store.MemoryStore``. + + By default (``store=None``) a new :class:`zarr.store.MemoryStore` + is created. + + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : str, optional + Group path within store. + chunk_store : Store or str, optional + Store or path to directory in file system or name of zip file. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + attributes : dict + A dictionary of JSON-serializable values with user-defined attributes. + use_consolidated : bool or str, default None + Whether to use consolidated metadata. + + By default, consolidated metadata is used if it's present in the + store (in the ``zarr.json`` for Zarr v3 and in the ``.zmetadata`` file + for Zarr v2). + + To explicitly require consolidated metadata, set ``use_consolidated=True``, + which will raise an exception if consolidated metadata is not found. + + To explicitly *not* use consolidated metadata, set ``use_consolidated=False``, + which will fall back to using the regular, non consolidated metadata. + + Zarr v2 allowed configuring the key storing the consolidated metadata + (``.zmetadata`` by default). Specify the custom key as ``use_consolidated`` + to load consolidated metadata from a non-default key. + + Returns + ------- + g : group + The new group. + """ + + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + if cache_attrs is not None: + warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) + if synchronizer is not None: + warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) + if meta_array is not None: + warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) + if chunk_store is not None: + warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) + + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) + if path is not None: + store_path = store_path / path + + if attributes is None: + attributes = {} + + try: + return await AsyncGroup.open( + store_path, zarr_format=zarr_format, use_consolidated=use_consolidated + ) + except (KeyError, FileNotFoundError): + return await AsyncGroup.from_store( + store_path, + zarr_format=zarr_format or _default_zarr_version(), + exists_ok=True, + attributes=attributes, + ) + + +async def create( + shape: ChunkCoords, + *, # Note: this is a change from v2 + chunks: ChunkCoords | None = None, # TODO: v2 allowed chunks=True + dtype: npt.DTypeLike | None = None, + compressor: dict[str, JSON] | None = None, # TODO: default and type change + fill_value: Any | None = 0, # TODO: need type + order: MemoryOrder | None = None, # TODO: default change + store: str | StoreLike | None = None, + synchronizer: Any | None = None, + overwrite: bool = False, + path: PathLike | None = None, + chunk_store: StoreLike | None = None, + filters: list[dict[str, JSON]] | None = None, # TODO: type has changed + cache_metadata: bool | None = None, + cache_attrs: bool | None = None, + read_only: bool | None = None, + object_codec: Codec | None = None, # TODO: type has changed + dimension_separator: Literal[".", "/"] | None = None, + write_empty_chunks: bool = False, # TODO: default has changed + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # TODO: need type + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + storage_options: dict[str, Any] | None = None, + **kwargs: Any, +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an array. + + Parameters + ---------- + shape : int or tuple of ints + Array shape. + chunks : int or tuple of ints, optional + Chunk shape. If True, will be guessed from `shape` and `dtype`. If + False, will be set to `shape`, i.e., single chunk for the whole array. + If an int, the chunk size in each dimension will be given by the value + of `chunks`. Default is True. + dtype : str or dtype, optional + NumPy dtype. + compressor : Codec, optional + Primary compressor. + fill_value : object + Default value to use for uninitialized portions of the array. + order : {'C', 'F'}, optional + Memory layout to be used within each chunk. + store : Store or str + Store or path to directory in file system or name of zip file. + synchronizer : object, optional + Array synchronizer. + overwrite : bool, optional + If True, delete all pre-existing data in `store` at `path` before + creating the array. + path : str, optional + Path under which array is stored. + chunk_store : MutableMapping, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + filters : sequence of Codecs, optional + Sequence of filters to use to encode chunk data prior to compression. + cache_metadata : bool, optional + If True, array configuration metadata will be cached for the + lifetime of the object. If False, array metadata will be reloaded + prior to all data access and modification operations (may incur + overhead depending on storage and data access pattern). + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + read_only : bool, optional + True if array should be protected against modification. + object_codec : Codec, optional + A codec to encode object arrays, only needed if dtype=object. + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. + + .. versionadded:: 2.8 + + write_empty_chunks : bool, optional + If True (default), all chunks will be stored regardless of their + contents. If False, each chunk is compared to the array's fill value + prior to storing. If a chunk is uniformly equal to the fill value, then + that chunk is not be stored, and the store entry for that chunk's key + is deleted. This setting enables sparser storage, as only chunks with + non-fill-value data are stored, at the expense of overhead associated + with checking the data of each chunk. + + .. versionadded:: 2.11 + + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.13 + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + + Returns + ------- + z : array + The array. + """ + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) + + if zarr_format == 2 and chunks is None: + chunks = shape + elif zarr_format == 3 and chunk_shape is None: + if chunks is not None: + chunk_shape = chunks + chunks = None + else: + chunk_shape = shape + + if order is not None: + warnings.warn( + "order is deprecated, use config `array.order` instead", + DeprecationWarning, + stacklevel=2, + ) + if synchronizer is not None: + warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) + if chunk_store is not None: + warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_metadata is not None: + warnings.warn("cache_metadata is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_attrs is not None: + warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) + if object_codec is not None: + warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2) + if dimension_separator is not None: + if zarr_format == 3: + raise ValueError( + "dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead" + ) + else: + warnings.warn( + "dimension_separator is not yet implemented", + RuntimeWarning, + stacklevel=2, + ) + if write_empty_chunks: + warnings.warn("write_empty_chunks is not yet implemented", RuntimeWarning, stacklevel=2) + if meta_array is not None: + warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) + + mode = kwargs.pop("mode", None) + if mode is None: + if not isinstance(store, Store | StorePath): + mode = "a" + + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) + if path is not None: + store_path = store_path / path + + return await AsyncArray.create( + store_path, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + exists_ok=overwrite, # TODO: name change + filters=filters, + dimension_separator=dimension_separator, + zarr_format=zarr_format, + chunk_shape=chunk_shape, + chunk_key_encoding=chunk_key_encoding, + codecs=codecs, + dimension_names=dimension_names, + attributes=attributes, + **kwargs, + ) + + +async def empty( + shape: ChunkCoords, **kwargs: Any +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an empty array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Notes + ----- + The contents of an empty Zarr array are not defined. On attempting to + retrieve data from an empty Zarr array, any values may be returned, + and these are not guaranteed to be stable from one access to the next. + """ + return await create(shape=shape, fill_value=None, **kwargs) + + +async def empty_like( + a: ArrayLike, **kwargs: Any +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an empty array like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + like_kwargs = _like_args(a, kwargs) + return await empty(**like_kwargs) + + +# TODO: add type annotations for fill_value and kwargs +async def full( + shape: ChunkCoords, fill_value: Any, **kwargs: Any +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an array, with `fill_value` being used as the default value for + uninitialized portions of the array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + fill_value : scalar + Fill value. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + return await create(shape=shape, fill_value=fill_value, **kwargs) + + +# TODO: add type annotations for kwargs +async def full_like( + a: ArrayLike, **kwargs: Any +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create a filled array like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + like_kwargs = _like_args(a, kwargs) + if isinstance(a, AsyncArray): + like_kwargs.setdefault("fill_value", a.metadata.fill_value) + return await full(**like_kwargs) + + +async def ones( + shape: ChunkCoords, **kwargs: Any +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an array, with one being used as the default value for + uninitialized portions of the array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + return await create(shape=shape, fill_value=1, **kwargs) + + +async def ones_like( + a: ArrayLike, **kwargs: Any +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an array of ones like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + like_kwargs = _like_args(a, kwargs) + return await ones(**like_kwargs) + + +async def open_array( + *, # note: this is a change from v2 + store: StoreLike | None = None, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: PathLike | None = None, + storage_options: dict[str, Any] | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to save +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Open an array using file-mode-like semantics. + + Parameters + ---------- + store : Store or str + Store or path to directory in file system or name of zip file. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str, optional + Path in store to array. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + **kwargs + Any keyword arguments to pass to the array constructor. + + Returns + ------- + AsyncArray + The opened array. + """ + + mode = kwargs.pop("mode", None) + store_path = await make_store_path(store, mode=mode) + if path is not None: + store_path = store_path / path + + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + try: + return await AsyncArray.open(store_path, zarr_format=zarr_format) + except FileNotFoundError: + if store_path.store.mode.create: + return await create( + store=store_path, + zarr_format=zarr_format or _default_zarr_version(), + overwrite=store_path.store.mode.overwrite, + **kwargs, + ) + raise + + +async def open_like( + a: ArrayLike, path: str, **kwargs: Any +) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: + """Open a persistent array like `a`. + + Parameters + ---------- + a : Array + The shape and data-type of a define these same attributes of the returned array. + path : str + The path to the new array. + **kwargs + Any keyword arguments to pass to the array constructor. + + Returns + ------- + AsyncArray + The opened array. + """ + like_kwargs = _like_args(a, kwargs) + if isinstance(a, (AsyncArray | Array)): + kwargs.setdefault("fill_value", a.metadata.fill_value) + return await open_array(path=path, **like_kwargs) + + +async def zeros( + shape: ChunkCoords, **kwargs: Any +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an array, with zero being used as the default value for + uninitialized portions of the array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + return await create(shape=shape, fill_value=0, **kwargs) + + +async def zeros_like( + a: ArrayLike, **kwargs: Any +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an array of zeros like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + like_kwargs = _like_args(a, kwargs) + return await zeros(**like_kwargs) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py new file mode 100644 index 0000000000..9dcd6fe2d5 --- /dev/null +++ b/src/zarr/api/synchronous.py @@ -0,0 +1,316 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal + +import zarr.api.asynchronous as async_api +from zarr._compat import _deprecate_positional_args +from zarr.core.array import Array, AsyncArray +from zarr.core.group import Group +from zarr.core.sync import sync + +if TYPE_CHECKING: + from zarr.core.buffer import NDArrayLike + from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, ZarrFormat + from zarr.storage import StoreLike + +__all__ = [ + "array", + "consolidate_metadata", + "copy", + "copy_all", + "copy_store", + "create", + "empty", + "empty_like", + "full", + "full_like", + "group", + "load", + "ones", + "ones_like", + "open", + "open_array", + "open_consolidated", + "open_group", + "open_like", + "save", + "save_array", + "save_group", + "tree", + "zeros", + "zeros_like", +] + + +def consolidate_metadata(*args: Any, **kwargs: Any) -> Group: + return Group(sync(async_api.consolidate_metadata(*args, **kwargs))) + + +def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + return sync(async_api.copy(*args, **kwargs)) + + +def copy_all(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + return sync(async_api.copy_all(*args, **kwargs)) + + +def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + return sync(async_api.copy_store(*args, **kwargs)) + + +def load( + store: StoreLike, zarr_version: ZarrFormat | None = None, path: str | None = None +) -> NDArrayLike | dict[str, NDArrayLike]: + return sync(async_api.load(store=store, zarr_version=zarr_version, path=path)) + + +@_deprecate_positional_args +def open( + store: StoreLike | None = None, + *, + mode: AccessModeLiteral | None = None, # type and value changed + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to async_api.open +) -> Array | Group: + obj = sync( + async_api.open( + store=store, + mode=mode, + zarr_version=zarr_version, + zarr_format=zarr_format, + path=path, + **kwargs, + ) + ) + if isinstance(obj, AsyncArray): + return Array(obj) + else: + return Group(obj) + + +def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group: + return Group( + sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs)) + ) + + +def save( + store: StoreLike, + *args: NDArrayLike, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to async_api.save +) -> None: + return sync( + async_api.save( + store, *args, zarr_version=zarr_version, zarr_format=zarr_format, path=path, **kwargs + ) + ) + + +@_deprecate_positional_args +def save_array( + store: StoreLike, + arr: NDArrayLike, + *, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to async_api.save_array +) -> None: + return sync( + async_api.save_array( + store=store, + arr=arr, + zarr_version=zarr_version, + zarr_format=zarr_format, + path=path, + **kwargs, + ) + ) + + +def save_group( + store: StoreLike, + *args: NDArrayLike, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + storage_options: dict[str, Any] | None = None, + **kwargs: NDArrayLike, +) -> None: + return sync( + async_api.save_group( + store, + *args, + zarr_version=zarr_version, + zarr_format=zarr_format, + path=path, + storage_options=storage_options, + **kwargs, + ) + ) + + +def tree(*args: Any, **kwargs: Any) -> None: + return sync(async_api.tree(*args, **kwargs)) + + +# TODO: add type annotations for kwargs +def array(data: NDArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.array(data=data, **kwargs))) + + +@_deprecate_positional_args +def group( + store: StoreLike | None = None, + *, # Note: this is a change from v2 + overwrite: bool = False, + chunk_store: StoreLike | None = None, # not used in async_api + cache_attrs: bool | None = None, # default changed, not used in async_api + synchronizer: Any | None = None, # not used in async_api + path: str | None = None, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used in async_api + attributes: dict[str, JSON] | None = None, +) -> Group: + return Group( + sync( + async_api.group( + store=store, + overwrite=overwrite, + chunk_store=chunk_store, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + zarr_version=zarr_version, + zarr_format=zarr_format, + meta_array=meta_array, + attributes=attributes, + ) + ) + ) + + +@_deprecate_positional_args +def open_group( + store: StoreLike | None = None, + *, # Note: this is a change from v2 + mode: AccessModeLiteral | None = None, # not used in async api + cache_attrs: bool | None = None, # default changed, not used in async api + synchronizer: Any = None, # not used in async api + path: str | None = None, + chunk_store: StoreLike | None = None, # not used in async api + storage_options: dict[str, Any] | None = None, # not used in async api + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used in async api + attributes: dict[str, JSON] | None = None, + use_consolidated: bool | str | None = None, +) -> Group: + return Group( + sync( + async_api.open_group( + store=store, + mode=mode, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + chunk_store=chunk_store, + storage_options=storage_options, + zarr_version=zarr_version, + zarr_format=zarr_format, + meta_array=meta_array, + attributes=attributes, + use_consolidated=use_consolidated, + ) + ) + ) + + +# TODO: add type annotations for kwargs +def create(*args: Any, **kwargs: Any) -> Array: + return Array(sync(async_api.create(*args, **kwargs))) + + +# TODO: add type annotations for kwargs +def empty(shape: ChunkCoords, **kwargs: Any) -> Array: + return Array(sync(async_api.empty(shape, **kwargs))) + + +# TODO: move ArrayLike to common module +# TODO: add type annotations for kwargs +def empty_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.empty_like(a, **kwargs))) + + +# TODO: add type annotations for kwargs and fill_value +def full(shape: ChunkCoords, fill_value: Any, **kwargs: Any) -> Array: + return Array(sync(async_api.full(shape=shape, fill_value=fill_value, **kwargs))) + + +# TODO: move ArrayLike to common module +# TODO: add type annotations for kwargs +def full_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.full_like(a, **kwargs))) + + +# TODO: add type annotations for kwargs +def ones(shape: ChunkCoords, **kwargs: Any) -> Array: + return Array(sync(async_api.ones(shape, **kwargs))) + + +# TODO: add type annotations for kwargs +def ones_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.ones_like(a, **kwargs))) + + +# TODO: update this once async_api.open_array is fully implemented +def open_array(*args: Any, **kwargs: Any) -> Array: + return Array(sync(async_api.open_array(*args, **kwargs))) + + +# TODO: add type annotations for kwargs +def open_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.open_like(a, **kwargs))) + + +# TODO: add type annotations for kwargs +def zeros(*args: Any, **kwargs: Any) -> Array: + return Array(sync(async_api.zeros(*args, **kwargs))) + + +# TODO: add type annotations for kwargs +def zeros_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.zeros_like(a, **kwargs))) + + +consolidate_metadata.__doc__ = async_api.copy.__doc__ +copy.__doc__ = async_api.copy.__doc__ +copy_all.__doc__ = async_api.copy_all.__doc__ +copy_store.__doc__ = async_api.copy_store.__doc__ +load.__doc__ = async_api.load.__doc__ +open.__doc__ = async_api.open.__doc__ +open_consolidated.__doc__ = async_api.open_consolidated.__doc__ +save.__doc__ = async_api.save.__doc__ +save_array.__doc__ = async_api.save_array.__doc__ +save_group.__doc__ = async_api.save_group.__doc__ +tree.__doc__ = async_api.tree.__doc__ +array.__doc__ = async_api.array.__doc__ +group.__doc__ = async_api.group.__doc__ +open_group.__doc__ = async_api.open_group.__doc__ +create.__doc__ = async_api.create.__doc__ +empty.__doc__ = async_api.empty.__doc__ +empty_like.__doc__ = async_api.empty_like.__doc__ +full.__doc__ = async_api.full.__doc__ +full_like.__doc__ = async_api.full_like.__doc__ +ones.__doc__ = async_api.ones.__doc__ +ones_like.__doc__ = async_api.ones_like.__doc__ +open_array.__doc__ = async_api.open_array.__doc__ +open_like.__doc__ = async_api.open_like.__doc__ +zeros.__doc__ = async_api.zeros.__doc__ +zeros_like.__doc__ = async_api.zeros_like.__doc__ diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py new file mode 100644 index 0000000000..dc6c3f9154 --- /dev/null +++ b/src/zarr/codecs/__init__.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + import numpy as np + +from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle +from zarr.codecs.bytes import BytesCodec, Endian +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.gzip import GzipCodec +from zarr.codecs.pipeline import BatchedCodecPipeline +from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation +from zarr.codecs.transpose import TransposeCodec +from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec +from zarr.codecs.zstd import ZstdCodec +from zarr.core.metadata.v3 import DataType + +__all__ = [ + "BatchedCodecPipeline", + "BloscCname", + "BloscCodec", + "BloscShuffle", + "BytesCodec", + "Crc32cCodec", + "Endian", + "GzipCodec", + "ShardingCodec", + "ShardingCodecIndexLocation", + "TransposeCodec", + "VLenBytesCodec", + "VLenUTF8Codec", + "ZstdCodec", +] + + +def _get_default_array_bytes_codec( + np_dtype: np.dtype[Any], +) -> BytesCodec | VLenUTF8Codec | VLenBytesCodec: + dtype = DataType.from_numpy(np_dtype) + if dtype == DataType.string: + return VLenUTF8Codec() + elif dtype == DataType.bytes: + return VLenBytesCodec() + else: + return BytesCodec() diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py new file mode 100644 index 0000000000..0f50264be8 --- /dev/null +++ b/src/zarr/codecs/_v2.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import numcodecs +from numcodecs.compat import ensure_bytes, ensure_ndarray + +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec +from zarr.core.buffer import Buffer, NDBuffer, default_buffer_prototype +from zarr.registry import get_ndbuffer_class + +if TYPE_CHECKING: + import numcodecs.abc + + from zarr.core.array_spec import ArraySpec + + +@dataclass(frozen=True) +class V2Compressor(ArrayBytesCodec): + compressor: numcodecs.abc.Codec | None + + is_fixed_size = False + + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> NDBuffer: + if self.compressor is not None: + chunk_numpy_array = ensure_ndarray( + await asyncio.to_thread(self.compressor.decode, chunk_bytes.as_array_like()) + ) + else: + chunk_numpy_array = ensure_ndarray(chunk_bytes.as_array_like()) + + # ensure correct dtype + if str(chunk_numpy_array.dtype) != chunk_spec.dtype and not chunk_spec.dtype.hasobject: + chunk_numpy_array = chunk_numpy_array.view(chunk_spec.dtype) + + return get_ndbuffer_class().from_numpy_array(chunk_numpy_array) + + async def _encode_single( + self, + chunk_array: NDBuffer, + _chunk_spec: ArraySpec, + ) -> Buffer | None: + chunk_numpy_array = chunk_array.as_numpy_array() + if self.compressor is not None: + if ( + not chunk_numpy_array.flags.c_contiguous + and not chunk_numpy_array.flags.f_contiguous + ): + chunk_numpy_array = chunk_numpy_array.copy(order="A") + encoded_chunk_bytes = ensure_bytes( + await asyncio.to_thread(self.compressor.encode, chunk_numpy_array) + ) + else: + encoded_chunk_bytes = ensure_bytes(chunk_numpy_array) + + return default_buffer_prototype().buffer.from_bytes(encoded_chunk_bytes) + + def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: + raise NotImplementedError + + +@dataclass(frozen=True) +class V2Filters(ArrayArrayCodec): + filters: tuple[numcodecs.abc.Codec, ...] | None + + is_fixed_size = False + + async def _decode_single( + self, + chunk_array: NDBuffer, + chunk_spec: ArraySpec, + ) -> NDBuffer: + chunk_ndarray = chunk_array.as_ndarray_like() + # apply filters in reverse order + if self.filters is not None: + for filter in self.filters[::-1]: + chunk_ndarray = await asyncio.to_thread(filter.decode, chunk_ndarray) + + # ensure correct chunk shape + if chunk_ndarray.shape != chunk_spec.shape: + chunk_ndarray = chunk_ndarray.reshape( + chunk_spec.shape, + order=chunk_spec.order, + ) + + return get_ndbuffer_class().from_ndarray_like(chunk_ndarray) + + async def _encode_single( + self, + chunk_array: NDBuffer, + chunk_spec: ArraySpec, + ) -> NDBuffer | None: + chunk_ndarray = chunk_array.as_ndarray_like().ravel(order=chunk_spec.order) + + if self.filters is not None: + for filter in self.filters: + chunk_ndarray = await asyncio.to_thread(filter.encode, chunk_ndarray) + + return get_ndbuffer_class().from_ndarray_like(chunk_ndarray) + + def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: + raise NotImplementedError diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py new file mode 100644 index 0000000000..54a23c9c57 --- /dev/null +++ b/src/zarr/codecs/blosc.py @@ -0,0 +1,195 @@ +from __future__ import annotations + +import asyncio +from dataclasses import dataclass, replace +from enum import Enum +from functools import cached_property +from typing import TYPE_CHECKING + +import numcodecs +from numcodecs.blosc import Blosc + +from zarr.abc.codec import BytesBytesCodec +from zarr.core.buffer.cpu import as_numpy_array_wrapper +from zarr.core.common import JSON, parse_enum, parse_named_configuration +from zarr.registry import register_codec + +if TYPE_CHECKING: + from typing import Self + + from zarr.core.array_spec import ArraySpec + from zarr.core.buffer import Buffer + + +class BloscShuffle(Enum): + """ + Enum for shuffle filter used by blosc. + """ + + noshuffle = "noshuffle" + shuffle = "shuffle" + bitshuffle = "bitshuffle" + + @classmethod + def from_int(cls, num: int) -> BloscShuffle: + blosc_shuffle_int_to_str = { + 0: "noshuffle", + 1: "shuffle", + 2: "bitshuffle", + } + if num not in blosc_shuffle_int_to_str: + raise ValueError(f"Value must be between 0 and 2. Got {num}.") + return BloscShuffle[blosc_shuffle_int_to_str[num]] + + +class BloscCname(Enum): + """ + Enum for compression library used by blosc. + """ + + lz4 = "lz4" + lz4hc = "lz4hc" + blosclz = "blosclz" + zstd = "zstd" + snappy = "snappy" + zlib = "zlib" + + +# See https://zarr.readthedocs.io/en/stable/tutorial.html#configuring-blosc +numcodecs.blosc.use_threads = False + + +def parse_typesize(data: JSON) -> int: + if isinstance(data, int): + if data > 0: + return data + else: + raise ValueError( + f"Value must be greater than 0. Got {data}, which is less or equal to 0." + ) + raise TypeError(f"Value must be an int. Got {type(data)} instead.") + + +# todo: real validation +def parse_clevel(data: JSON) -> int: + if isinstance(data, int): + return data + raise TypeError(f"Value should be an int. Got {type(data)} instead.") + + +def parse_blocksize(data: JSON) -> int: + if isinstance(data, int): + return data + raise TypeError(f"Value should be an int. Got {type(data)} instead.") + + +@dataclass(frozen=True) +class BloscCodec(BytesBytesCodec): + is_fixed_size = False + + typesize: int | None + cname: BloscCname = BloscCname.zstd + clevel: int = 5 + shuffle: BloscShuffle | None = BloscShuffle.noshuffle + blocksize: int = 0 + + def __init__( + self, + *, + typesize: int | None = None, + cname: BloscCname | str = BloscCname.zstd, + clevel: int = 5, + shuffle: BloscShuffle | str | None = None, + blocksize: int = 0, + ) -> None: + typesize_parsed = parse_typesize(typesize) if typesize is not None else None + cname_parsed = parse_enum(cname, BloscCname) + clevel_parsed = parse_clevel(clevel) + shuffle_parsed = parse_enum(shuffle, BloscShuffle) if shuffle is not None else None + blocksize_parsed = parse_blocksize(blocksize) + + object.__setattr__(self, "typesize", typesize_parsed) + object.__setattr__(self, "cname", cname_parsed) + object.__setattr__(self, "clevel", clevel_parsed) + object.__setattr__(self, "shuffle", shuffle_parsed) + object.__setattr__(self, "blocksize", blocksize_parsed) + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration(data, "blosc") + return cls(**configuration_parsed) # type: ignore[arg-type] + + def to_dict(self) -> dict[str, JSON]: + if self.typesize is None: + raise ValueError("`typesize` needs to be set for serialization.") + if self.shuffle is None: + raise ValueError("`shuffle` needs to be set for serialization.") + return { + "name": "blosc", + "configuration": { + "typesize": self.typesize, + "cname": self.cname.value, + "clevel": self.clevel, + "shuffle": self.shuffle.value, + "blocksize": self.blocksize, + }, + } + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + dtype = array_spec.dtype + new_codec = self + if new_codec.typesize is None: + new_codec = replace(new_codec, typesize=dtype.itemsize) + if new_codec.shuffle is None: + new_codec = replace( + new_codec, + shuffle=(BloscShuffle.bitshuffle if dtype.itemsize == 1 else BloscShuffle.shuffle), + ) + + return new_codec + + @cached_property + def _blosc_codec(self) -> Blosc: + if self.shuffle is None: + raise ValueError("`shuffle` needs to be set for decoding and encoding.") + map_shuffle_str_to_int = { + BloscShuffle.noshuffle: 0, + BloscShuffle.shuffle: 1, + BloscShuffle.bitshuffle: 2, + } + config_dict = { + "cname": self.cname.name, + "clevel": self.clevel, + "shuffle": map_shuffle_str_to_int[self.shuffle], + "blocksize": self.blocksize, + } + return Blosc.from_config(config_dict) + + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> Buffer: + return await asyncio.to_thread( + as_numpy_array_wrapper, self._blosc_codec.decode, chunk_bytes, chunk_spec.prototype + ) + + async def _encode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> Buffer | None: + # Since blosc only support host memory, we convert the input and output of the encoding + # between numpy array and buffer + return await asyncio.to_thread( + lambda chunk: chunk_spec.prototype.buffer.from_bytes( + self._blosc_codec.encode(chunk.as_numpy_array()) + ), + chunk_bytes, + ) + + def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: + raise NotImplementedError + + +register_codec("blosc", BloscCodec) diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py new file mode 100644 index 0000000000..78c7b22fbc --- /dev/null +++ b/src/zarr/codecs/bytes.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import sys +from dataclasses import dataclass, replace +from enum import Enum +from typing import TYPE_CHECKING + +import numpy as np + +from zarr.abc.codec import ArrayBytesCodec +from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer +from zarr.core.common import JSON, parse_enum, parse_named_configuration +from zarr.registry import register_codec + +if TYPE_CHECKING: + from typing import Self + + from zarr.core.array_spec import ArraySpec + + +class Endian(Enum): + """ + Enum for endian type used by bytes codec. + """ + + big = "big" + little = "little" + + +default_system_endian = Endian(sys.byteorder) + + +@dataclass(frozen=True) +class BytesCodec(ArrayBytesCodec): + is_fixed_size = True + + endian: Endian | None + + def __init__(self, *, endian: Endian | str | None = default_system_endian) -> None: + endian_parsed = None if endian is None else parse_enum(endian, Endian) + + object.__setattr__(self, "endian", endian_parsed) + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration( + data, "bytes", require_configuration=False + ) + configuration_parsed = configuration_parsed or {} + return cls(**configuration_parsed) # type: ignore[arg-type] + + def to_dict(self) -> dict[str, JSON]: + if self.endian is None: + return {"name": "bytes"} + else: + return {"name": "bytes", "configuration": {"endian": self.endian.value}} + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + if array_spec.dtype.itemsize == 0: + if self.endian is not None: + return replace(self, endian=None) + elif self.endian is None: + raise ValueError( + "The `endian` configuration needs to be specified for multi-byte data types." + ) + return self + + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> NDBuffer: + assert isinstance(chunk_bytes, Buffer) + if chunk_spec.dtype.itemsize > 0: + if self.endian == Endian.little: + prefix = "<" + else: + prefix = ">" + dtype = np.dtype(f"{prefix}{chunk_spec.dtype.str[1:]}") + else: + dtype = np.dtype(f"|{chunk_spec.dtype.str[1:]}") + + as_array_like = chunk_bytes.as_array_like() + if isinstance(as_array_like, NDArrayLike): + as_nd_array_like = as_array_like + else: + as_nd_array_like = np.asanyarray(as_array_like) + chunk_array = chunk_spec.prototype.nd_buffer.from_ndarray_like( + as_nd_array_like.view(dtype=dtype) + ) + + # ensure correct chunk shape + if chunk_array.shape != chunk_spec.shape: + chunk_array = chunk_array.reshape( + chunk_spec.shape, + ) + return chunk_array + + async def _encode_single( + self, + chunk_array: NDBuffer, + chunk_spec: ArraySpec, + ) -> Buffer | None: + assert isinstance(chunk_array, NDBuffer) + if ( + chunk_array.dtype.itemsize > 1 + and self.endian is not None + and self.endian != chunk_array.byteorder + ): + # type-ignore is a numpy bug + # see https://github.com/numpy/numpy/issues/26473 + new_dtype = chunk_array.dtype.newbyteorder(self.endian.name) # type: ignore[arg-type] + chunk_array = chunk_array.astype(new_dtype) + + nd_array = chunk_array.as_ndarray_like() + # Flatten the nd-array (only copy if needed) and reinterpret as bytes + nd_array = nd_array.ravel().view(dtype="b") + return chunk_spec.prototype.buffer.from_array_like(nd_array) + + def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: + return input_byte_length + + +register_codec("bytes", BytesCodec) + +# compatibility with earlier versions of ZEP1 +register_codec("endian", BytesCodec) diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py new file mode 100644 index 0000000000..3a6624ad25 --- /dev/null +++ b/src/zarr/codecs/crc32c_.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, cast + +import numpy as np +import typing_extensions +from crc32c import crc32c + +from zarr.abc.codec import BytesBytesCodec +from zarr.core.common import JSON, parse_named_configuration +from zarr.registry import register_codec + +if TYPE_CHECKING: + from typing import Self + + from zarr.core.array_spec import ArraySpec + from zarr.core.buffer import Buffer + + +@dataclass(frozen=True) +class Crc32cCodec(BytesBytesCodec): + is_fixed_size = True + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + parse_named_configuration(data, "crc32c", require_configuration=False) + return cls() + + def to_dict(self) -> dict[str, JSON]: + return {"name": "crc32c"} + + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> Buffer: + data = chunk_bytes.as_numpy_array() + crc32_bytes = data[-4:] + inner_bytes = data[:-4] + + # Need to do a manual cast until https://github.com/numpy/numpy/issues/26783 is resolved + computed_checksum = np.uint32(crc32c(cast(typing_extensions.Buffer, inner_bytes))).tobytes() + stored_checksum = bytes(crc32_bytes) + if computed_checksum != stored_checksum: + raise ValueError( + f"Stored and computed checksum do not match. Stored: {stored_checksum!r}. Computed: {computed_checksum!r}." + ) + return chunk_spec.prototype.buffer.from_array_like(inner_bytes) + + async def _encode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> Buffer | None: + data = chunk_bytes.as_numpy_array() + # Calculate the checksum and "cast" it to a numpy array + checksum = np.array([crc32c(cast(typing_extensions.Buffer, data))], dtype=np.uint32) + # Append the checksum (as bytes) to the data + return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("b"))) + + def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: + return input_byte_length + 4 + + +register_codec("crc32c", Crc32cCodec) diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py new file mode 100644 index 0000000000..c0ad5e1385 --- /dev/null +++ b/src/zarr/codecs/gzip.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from numcodecs.gzip import GZip + +from zarr.abc.codec import BytesBytesCodec +from zarr.core.buffer.cpu import as_numpy_array_wrapper +from zarr.core.common import JSON, parse_named_configuration +from zarr.registry import register_codec + +if TYPE_CHECKING: + from typing import Self + + from zarr.core.array_spec import ArraySpec + from zarr.core.buffer import Buffer + + +def parse_gzip_level(data: JSON) -> int: + if not isinstance(data, (int)): + raise TypeError(f"Expected int, got {type(data)}") + if data not in range(0, 10): + raise ValueError( + f"Expected an integer from the inclusive range (0, 9). Got {data} instead." + ) + return data + + +@dataclass(frozen=True) +class GzipCodec(BytesBytesCodec): + is_fixed_size = False + + level: int = 5 + + def __init__(self, *, level: int = 5) -> None: + level_parsed = parse_gzip_level(level) + + object.__setattr__(self, "level", level_parsed) + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration(data, "gzip") + return cls(**configuration_parsed) # type: ignore[arg-type] + + def to_dict(self) -> dict[str, JSON]: + return {"name": "gzip", "configuration": {"level": self.level}} + + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> Buffer: + return await asyncio.to_thread( + as_numpy_array_wrapper, GZip(self.level).decode, chunk_bytes, chunk_spec.prototype + ) + + async def _encode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> Buffer | None: + return await asyncio.to_thread( + as_numpy_array_wrapper, GZip(self.level).encode, chunk_bytes, chunk_spec.prototype + ) + + def compute_encoded_size( + self, + _input_byte_length: int, + _chunk_spec: ArraySpec, + ) -> int: + raise NotImplementedError + + +register_codec("gzip", GzipCodec) diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py new file mode 100644 index 0000000000..1226a04f06 --- /dev/null +++ b/src/zarr/codecs/pipeline.py @@ -0,0 +1,527 @@ +from __future__ import annotations + +from dataclasses import dataclass +from itertools import islice, pairwise +from typing import TYPE_CHECKING, Any, TypeVar +from warnings import warn + +from zarr.abc.codec import ( + ArrayArrayCodec, + ArrayBytesCodec, + ArrayBytesCodecPartialDecodeMixin, + ArrayBytesCodecPartialEncodeMixin, + BytesBytesCodec, + Codec, + CodecPipeline, +) +from zarr.core.common import ChunkCoords, concurrent_map +from zarr.core.config import config +from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice +from zarr.core.metadata.v2 import _default_fill_value +from zarr.registry import register_pipeline + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator + from typing import Self + + import numpy as np + + from zarr.abc.store import ByteGetter, ByteSetter + from zarr.core.array_spec import ArraySpec + from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer + from zarr.core.chunk_grids import ChunkGrid + +T = TypeVar("T") +U = TypeVar("U") + + +def _unzip2(iterable: Iterable[tuple[T, U]]) -> tuple[list[T], list[U]]: + out0: list[T] = [] + out1: list[U] = [] + for item0, item1 in iterable: + out0.append(item0) + out1.append(item1) + return (out0, out1) + + +def batched(iterable: Iterable[T], n: int) -> Iterable[tuple[T, ...]]: + if n < 1: + raise ValueError("n must be at least one") + it = iter(iterable) + while batch := tuple(islice(it, n)): + yield batch + + +def resolve_batched(codec: Codec, chunk_specs: Iterable[ArraySpec]) -> Iterable[ArraySpec]: + return [codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs] + + +@dataclass(frozen=True) +class BatchedCodecPipeline(CodecPipeline): + """Default codec pipeline. + + This batched codec pipeline divides the chunk batches into batches of a configurable + batch size ("mini-batch"). Fetching, decoding, encoding and storing are performed in + lock step for each mini-batch. Multiple mini-batches are processing concurrently. + """ + + array_array_codecs: tuple[ArrayArrayCodec, ...] + array_bytes_codec: ArrayBytesCodec + bytes_bytes_codecs: tuple[BytesBytesCodec, ...] + batch_size: int + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + return type(self).from_codecs(c.evolve_from_array_spec(array_spec=array_spec) for c in self) + + @classmethod + def from_codecs(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self: + array_array_codecs, array_bytes_codec, bytes_bytes_codecs = codecs_from_list(codecs) + + return cls( + array_array_codecs=array_array_codecs, + array_bytes_codec=array_bytes_codec, + bytes_bytes_codecs=bytes_bytes_codecs, + batch_size=batch_size or config.get("codec_pipeline.batch_size"), + ) + + @property + def supports_partial_decode(self) -> bool: + """Determines whether the codec pipeline supports partial decoding. + + Currently, only codec pipelines with a single ArrayBytesCodec that supports + partial decoding can support partial decoding. This limitation is due to the fact + that ArrayArrayCodecs can change the slice selection leading to non-contiguous + slices and BytesBytesCodecs can change the chunk bytes in a way that slice + selections cannot be attributed to byte ranges anymore which renders partial + decoding infeasible. + + This limitation may softened in the future.""" + return (len(self.array_array_codecs) + len(self.bytes_bytes_codecs)) == 0 and isinstance( + self.array_bytes_codec, ArrayBytesCodecPartialDecodeMixin + ) + + @property + def supports_partial_encode(self) -> bool: + """Determines whether the codec pipeline supports partial encoding. + + Currently, only codec pipelines with a single ArrayBytesCodec that supports + partial encoding can support partial encoding. This limitation is due to the fact + that ArrayArrayCodecs can change the slice selection leading to non-contiguous + slices and BytesBytesCodecs can change the chunk bytes in a way that slice + selections cannot be attributed to byte ranges anymore which renders partial + encoding infeasible. + + This limitation may softened in the future.""" + return (len(self.array_array_codecs) + len(self.bytes_bytes_codecs)) == 0 and isinstance( + self.array_bytes_codec, ArrayBytesCodecPartialEncodeMixin + ) + + def __iter__(self) -> Iterator[Codec]: + yield from self.array_array_codecs + yield self.array_bytes_codec + yield from self.bytes_bytes_codecs + + def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None: + for codec in self: + codec.validate(shape=shape, dtype=dtype, chunk_grid=chunk_grid) + + def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: + for codec in self: + byte_length = codec.compute_encoded_size(byte_length, array_spec) + array_spec = codec.resolve_metadata(array_spec) + return byte_length + + def _codecs_with_resolved_metadata_batched( + self, chunk_specs: Iterable[ArraySpec] + ) -> tuple[ + list[tuple[ArrayArrayCodec, list[ArraySpec]]], + tuple[ArrayBytesCodec, list[ArraySpec]], + list[tuple[BytesBytesCodec, list[ArraySpec]]], + ]: + aa_codecs_with_spec: list[tuple[ArrayArrayCodec, list[ArraySpec]]] = [] + chunk_specs = list(chunk_specs) + for aa_codec in self.array_array_codecs: + aa_codecs_with_spec.append((aa_codec, chunk_specs)) + chunk_specs = [aa_codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs] + + ab_codec_with_spec = (self.array_bytes_codec, chunk_specs) + chunk_specs = [ + self.array_bytes_codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs + ] + + bb_codecs_with_spec: list[tuple[BytesBytesCodec, list[ArraySpec]]] = [] + for bb_codec in self.bytes_bytes_codecs: + bb_codecs_with_spec.append((bb_codec, chunk_specs)) + chunk_specs = [bb_codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs] + + return (aa_codecs_with_spec, ab_codec_with_spec, bb_codecs_with_spec) + + async def decode_batch( + self, + chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], + ) -> Iterable[NDBuffer | None]: + chunk_bytes_batch: Iterable[Buffer | None] + chunk_bytes_batch, chunk_specs = _unzip2(chunk_bytes_and_specs) + + ( + aa_codecs_with_spec, + ab_codec_with_spec, + bb_codecs_with_spec, + ) = self._codecs_with_resolved_metadata_batched(chunk_specs) + + for bb_codec, chunk_spec_batch in bb_codecs_with_spec[::-1]: + chunk_bytes_batch = await bb_codec.decode( + zip(chunk_bytes_batch, chunk_spec_batch, strict=False) + ) + + ab_codec, chunk_spec_batch = ab_codec_with_spec + chunk_array_batch = await ab_codec.decode( + zip(chunk_bytes_batch, chunk_spec_batch, strict=False) + ) + + for aa_codec, chunk_spec_batch in aa_codecs_with_spec[::-1]: + chunk_array_batch = await aa_codec.decode( + zip(chunk_array_batch, chunk_spec_batch, strict=False) + ) + + return chunk_array_batch + + async def decode_partial_batch( + self, + batch_info: Iterable[tuple[ByteGetter, SelectorTuple, ArraySpec]], + ) -> Iterable[NDBuffer | None]: + assert self.supports_partial_decode + assert isinstance(self.array_bytes_codec, ArrayBytesCodecPartialDecodeMixin) + return await self.array_bytes_codec.decode_partial(batch_info) + + async def encode_batch( + self, + chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]], + ) -> Iterable[Buffer | None]: + chunk_array_batch: Iterable[NDBuffer | None] + chunk_specs: Iterable[ArraySpec] + chunk_array_batch, chunk_specs = _unzip2(chunk_arrays_and_specs) + + for aa_codec in self.array_array_codecs: + chunk_array_batch = await aa_codec.encode( + zip(chunk_array_batch, chunk_specs, strict=False) + ) + chunk_specs = resolve_batched(aa_codec, chunk_specs) + + chunk_bytes_batch = await self.array_bytes_codec.encode( + zip(chunk_array_batch, chunk_specs, strict=False) + ) + chunk_specs = resolve_batched(self.array_bytes_codec, chunk_specs) + + for bb_codec in self.bytes_bytes_codecs: + chunk_bytes_batch = await bb_codec.encode( + zip(chunk_bytes_batch, chunk_specs, strict=False) + ) + chunk_specs = resolve_batched(bb_codec, chunk_specs) + + return chunk_bytes_batch + + async def encode_partial_batch( + self, + batch_info: Iterable[tuple[ByteSetter, NDBuffer, SelectorTuple, ArraySpec]], + ) -> None: + assert self.supports_partial_encode + assert isinstance(self.array_bytes_codec, ArrayBytesCodecPartialEncodeMixin) + await self.array_bytes_codec.encode_partial(batch_info) + + async def read_batch( + self, + batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]], + out: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> None: + if self.supports_partial_decode: + chunk_array_batch = await self.decode_partial_batch( + [ + (byte_getter, chunk_selection, chunk_spec) + for byte_getter, chunk_spec, chunk_selection, _ in batch_info + ] + ) + for chunk_array, (_, chunk_spec, _, out_selection) in zip( + chunk_array_batch, batch_info, strict=False + ): + if chunk_array is not None: + out[out_selection] = chunk_array + else: + fill_value = chunk_spec.fill_value + + if fill_value is None: + # Zarr V2 allowed `fill_value` to be null in the metadata. + # Zarr V3 requires it to be set. This has already been + # validated when decoding the metadata, but we support reading + # Zarr V2 data and need to support the case where fill_value + # is None. + fill_value = _default_fill_value(dtype=chunk_spec.dtype) + + out[out_selection] = fill_value + else: + chunk_bytes_batch = await concurrent_map( + [ + (byte_getter, array_spec.prototype) + for byte_getter, array_spec, _, _ in batch_info + ], + lambda byte_getter, prototype: byte_getter.get(prototype), + config.get("async.concurrency"), + ) + chunk_array_batch = await self.decode_batch( + [ + (chunk_bytes, chunk_spec) + for chunk_bytes, (_, chunk_spec, _, _) in zip( + chunk_bytes_batch, batch_info, strict=False + ) + ], + ) + for chunk_array, (_, chunk_spec, chunk_selection, out_selection) in zip( + chunk_array_batch, batch_info, strict=False + ): + if chunk_array is not None: + tmp = chunk_array[chunk_selection] + if drop_axes != (): + tmp = tmp.squeeze(axis=drop_axes) + out[out_selection] = tmp + else: + fill_value = chunk_spec.fill_value + if fill_value is None: + fill_value = _default_fill_value(dtype=chunk_spec.dtype) + out[out_selection] = fill_value + + def _merge_chunk_array( + self, + existing_chunk_array: NDBuffer | None, + value: NDBuffer, + out_selection: SelectorTuple, + chunk_spec: ArraySpec, + chunk_selection: SelectorTuple, + drop_axes: tuple[int, ...], + ) -> NDBuffer: + if is_total_slice(chunk_selection, chunk_spec.shape) and value.shape == chunk_spec.shape: + return value + if existing_chunk_array is None: + chunk_array = chunk_spec.prototype.nd_buffer.create( + shape=chunk_spec.shape, + dtype=chunk_spec.dtype, + order=chunk_spec.order, + fill_value=chunk_spec.fill_value, + ) + else: + chunk_array = existing_chunk_array.copy() # make a writable copy + if chunk_selection == () or is_scalar(value.as_ndarray_like(), chunk_spec.dtype): + chunk_value = value + else: + chunk_value = value[out_selection] + # handle missing singleton dimensions + if drop_axes != (): + item = tuple( + None # equivalent to np.newaxis + if idx in drop_axes + else slice(None) + for idx in range(chunk_spec.ndim) + ) + chunk_value = chunk_value[item] + chunk_array[chunk_selection] = chunk_value + return chunk_array + + async def write_batch( + self, + batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]], + value: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> None: + if self.supports_partial_encode: + await self.encode_partial_batch( + [ + (byte_setter, value[out_selection], chunk_selection, chunk_spec) + for byte_setter, chunk_spec, chunk_selection, out_selection in batch_info + ], + ) + + else: + # Read existing bytes if not total slice + async def _read_key( + byte_setter: ByteSetter | None, prototype: BufferPrototype + ) -> Buffer | None: + if byte_setter is None: + return None + return await byte_setter.get(prototype=prototype) + + chunk_bytes_batch: Iterable[Buffer | None] + chunk_bytes_batch = await concurrent_map( + [ + ( + None if is_total_slice(chunk_selection, chunk_spec.shape) else byte_setter, + chunk_spec.prototype, + ) + for byte_setter, chunk_spec, chunk_selection, _ in batch_info + ], + _read_key, + config.get("async.concurrency"), + ) + chunk_array_batch = await self.decode_batch( + [ + (chunk_bytes, chunk_spec) + for chunk_bytes, (_, chunk_spec, _, _) in zip( + chunk_bytes_batch, batch_info, strict=False + ) + ], + ) + + chunk_array_batch = [ + self._merge_chunk_array( + chunk_array, value, out_selection, chunk_spec, chunk_selection, drop_axes + ) + for chunk_array, (_, chunk_spec, chunk_selection, out_selection) in zip( + chunk_array_batch, batch_info, strict=False + ) + ] + + chunk_array_batch = [ + None + if chunk_array is None or chunk_array.all_equal(chunk_spec.fill_value) + else chunk_array + for chunk_array, (_, chunk_spec, _, _) in zip( + chunk_array_batch, batch_info, strict=False + ) + ] + + chunk_bytes_batch = await self.encode_batch( + [ + (chunk_array, chunk_spec) + for chunk_array, (_, chunk_spec, _, _) in zip( + chunk_array_batch, batch_info, strict=False + ) + ], + ) + + async def _write_key(byte_setter: ByteSetter, chunk_bytes: Buffer | None) -> None: + if chunk_bytes is None: + await byte_setter.delete() + else: + await byte_setter.set(chunk_bytes) + + await concurrent_map( + [ + (byte_setter, chunk_bytes) + for chunk_bytes, (byte_setter, _, _, _) in zip( + chunk_bytes_batch, batch_info, strict=False + ) + ], + _write_key, + config.get("async.concurrency"), + ) + + async def decode( + self, + chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], + ) -> Iterable[NDBuffer | None]: + output: list[NDBuffer | None] = [] + for batch_info in batched(chunk_bytes_and_specs, self.batch_size): + output.extend(await self.decode_batch(batch_info)) + return output + + async def encode( + self, + chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]], + ) -> Iterable[Buffer | None]: + output: list[Buffer | None] = [] + for single_batch_info in batched(chunk_arrays_and_specs, self.batch_size): + output.extend(await self.encode_batch(single_batch_info)) + return output + + async def read( + self, + batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]], + out: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> None: + await concurrent_map( + [ + (single_batch_info, out, drop_axes) + for single_batch_info in batched(batch_info, self.batch_size) + ], + self.read_batch, + config.get("async.concurrency"), + ) + + async def write( + self, + batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]], + value: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> None: + await concurrent_map( + [ + (single_batch_info, value, drop_axes) + for single_batch_info in batched(batch_info, self.batch_size) + ], + self.write_batch, + config.get("async.concurrency"), + ) + + +def codecs_from_list( + codecs: Iterable[Codec], +) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]: + from zarr.codecs.sharding import ShardingCodec + + array_array: tuple[ArrayArrayCodec, ...] = () + array_bytes_maybe: ArrayBytesCodec | None = None + bytes_bytes: tuple[BytesBytesCodec, ...] = () + + if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(tuple(codecs)) > 1: + warn( + "Combining a `sharding_indexed` codec disables partial reads and " + "writes, which may lead to inefficient performance.", + stacklevel=3, + ) + + for prev_codec, cur_codec in pairwise((None, *codecs)): + if isinstance(cur_codec, ArrayArrayCodec): + if isinstance(prev_codec, ArrayBytesCodec | BytesBytesCodec): + msg = ( + f"Invalid codec order. ArrayArrayCodec {cur_codec}" + "must be preceded by another ArrayArrayCodec. " + f"Got {type(prev_codec)} instead." + ) + raise TypeError(msg) + array_array += (cur_codec,) + + elif isinstance(cur_codec, ArrayBytesCodec): + if isinstance(prev_codec, BytesBytesCodec): + msg = ( + f"Invalid codec order. ArrayBytes codec {cur_codec}" + f" must be preceded by an ArrayArrayCodec. Got {type(prev_codec)} instead." + ) + raise TypeError(msg) + + if array_bytes_maybe is not None: + msg = ( + f"Got two instances of ArrayBytesCodec: {array_bytes_maybe} and {cur_codec}. " + "Only one array-to-bytes codec is allowed." + ) + raise ValueError(msg) + + array_bytes_maybe = cur_codec + + elif isinstance(cur_codec, BytesBytesCodec): + if isinstance(prev_codec, ArrayArrayCodec): + msg = ( + f"Invalid codec order. BytesBytesCodec {cur_codec}" + "must be preceded by either another BytesBytesCodec, or an ArrayBytesCodec. " + f"Got {type(prev_codec)} instead." + ) + bytes_bytes += (cur_codec,) + else: + raise TypeError + + if array_bytes_maybe is None: + raise ValueError("Required ArrayBytesCodec was not found.") + else: + return array_array, array_bytes_maybe, bytes_bytes + + +register_pipeline(BatchedCodecPipeline) diff --git a/docs/_static/donotdelete b/src/zarr/codecs/registry.py similarity index 100% rename from docs/_static/donotdelete rename to src/zarr/codecs/registry.py diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py new file mode 100644 index 0000000000..2181e9eb76 --- /dev/null +++ b/src/zarr/codecs/sharding.py @@ -0,0 +1,729 @@ +from __future__ import annotations + +from collections.abc import Iterable, Mapping, MutableMapping +from dataclasses import dataclass, field, replace +from enum import Enum +from functools import lru_cache +from operator import itemgetter +from typing import TYPE_CHECKING, Any, NamedTuple, cast + +import numpy as np +import numpy.typing as npt + +from zarr.abc.codec import ( + ArrayBytesCodec, + ArrayBytesCodecPartialDecodeMixin, + ArrayBytesCodecPartialEncodeMixin, + Codec, + CodecPipeline, +) +from zarr.abc.store import ByteGetter, ByteRangeRequest, ByteSetter +from zarr.codecs.bytes import BytesCodec +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.core.array_spec import ArraySpec +from zarr.core.buffer import ( + Buffer, + BufferPrototype, + NDBuffer, + default_buffer_prototype, + numpy_buffer_prototype, +) +from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid +from zarr.core.common import ( + ChunkCoords, + ChunkCoordsLike, + parse_enum, + parse_named_configuration, + parse_shapelike, + product, +) +from zarr.core.indexing import ( + BasicIndexer, + SelectorTuple, + c_order_iter, + get_indexer, + morton_order_iter, +) +from zarr.core.metadata.v3 import parse_codecs +from zarr.registry import get_ndbuffer_class, get_pipeline_class, register_codec + +if TYPE_CHECKING: + from collections.abc import Awaitable, Callable, Iterator + from typing import Self + + from zarr.core.common import JSON + +MAX_UINT_64 = 2**64 - 1 +ShardMapping = Mapping[ChunkCoords, Buffer] +ShardMutableMapping = MutableMapping[ChunkCoords, Buffer] + + +class ShardingCodecIndexLocation(Enum): + """ + Enum for index location used by the sharding codec. + """ + + start = "start" + end = "end" + + +def parse_index_location(data: object) -> ShardingCodecIndexLocation: + return parse_enum(data, ShardingCodecIndexLocation) + + +@dataclass(frozen=True) +class _ShardingByteGetter(ByteGetter): + shard_dict: ShardMapping + chunk_coords: ChunkCoords + + async def get( + self, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None + ) -> Buffer | None: + assert byte_range is None, "byte_range is not supported within shards" + assert ( + prototype == default_buffer_prototype() + ), f"prototype is not supported within shards currently. diff: {prototype} != {default_buffer_prototype()}" + return self.shard_dict.get(self.chunk_coords) + + +@dataclass(frozen=True) +class _ShardingByteSetter(_ShardingByteGetter, ByteSetter): + shard_dict: ShardMutableMapping + + async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None: + assert byte_range is None, "byte_range is not supported within shards" + self.shard_dict[self.chunk_coords] = value + + async def delete(self) -> None: + del self.shard_dict[self.chunk_coords] + + async def set_if_not_exists(self, default: Buffer) -> None: + self.shard_dict.setdefault(self.chunk_coords, default) + + +class _ShardIndex(NamedTuple): + # dtype uint64, shape (chunks_per_shard_0, chunks_per_shard_1, ..., 2) + offsets_and_lengths: npt.NDArray[np.uint64] + + @property + def chunks_per_shard(self) -> ChunkCoords: + result = tuple(self.offsets_and_lengths.shape[0:-1]) + # The cast is required until https://github.com/numpy/numpy/pull/27211 is merged + return cast(ChunkCoords, result) + + def _localize_chunk(self, chunk_coords: ChunkCoords) -> ChunkCoords: + return tuple( + chunk_i % shard_i + for chunk_i, shard_i in zip(chunk_coords, self.offsets_and_lengths.shape, strict=False) + ) + + def is_all_empty(self) -> bool: + return bool(np.array_equiv(self.offsets_and_lengths, MAX_UINT_64)) + + def get_full_chunk_map(self) -> npt.NDArray[np.bool_]: + return np.not_equal(self.offsets_and_lengths[..., 0], MAX_UINT_64) + + def get_chunk_slice(self, chunk_coords: ChunkCoords) -> tuple[int, int] | None: + localized_chunk = self._localize_chunk(chunk_coords) + chunk_start, chunk_len = self.offsets_and_lengths[localized_chunk] + if (chunk_start, chunk_len) == (MAX_UINT_64, MAX_UINT_64): + return None + else: + return (int(chunk_start), int(chunk_start) + int(chunk_len)) + + def set_chunk_slice(self, chunk_coords: ChunkCoords, chunk_slice: slice | None) -> None: + localized_chunk = self._localize_chunk(chunk_coords) + if chunk_slice is None: + self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64) + else: + self.offsets_and_lengths[localized_chunk] = ( + chunk_slice.start, + chunk_slice.stop - chunk_slice.start, + ) + + def is_dense(self, chunk_byte_length: int) -> bool: + sorted_offsets_and_lengths = sorted( + [ + (offset, length) + for offset, length in self.offsets_and_lengths + if offset != MAX_UINT_64 + ], + key=itemgetter(0), + ) + + # Are all non-empty offsets unique? + if len( + {offset for offset, _ in sorted_offsets_and_lengths if offset != MAX_UINT_64} + ) != len(sorted_offsets_and_lengths): + return False + + return all( + offset % chunk_byte_length == 0 and length == chunk_byte_length + for offset, length in sorted_offsets_and_lengths + ) + + @classmethod + def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardIndex: + offsets_and_lengths = np.zeros(chunks_per_shard + (2,), dtype=" _ShardReader: + shard_index_size = codec._shard_index_size(chunks_per_shard) + obj = cls() + obj.buf = buf + if codec.index_location == ShardingCodecIndexLocation.start: + shard_index_bytes = obj.buf[:shard_index_size] + else: + shard_index_bytes = obj.buf[-shard_index_size:] + + obj.index = await codec._decode_shard_index(shard_index_bytes, chunks_per_shard) + return obj + + @classmethod + def create_empty( + cls, chunks_per_shard: ChunkCoords, buffer_prototype: BufferPrototype | None = None + ) -> _ShardReader: + if buffer_prototype is None: + buffer_prototype = default_buffer_prototype() + index = _ShardIndex.create_empty(chunks_per_shard) + obj = cls() + obj.buf = buffer_prototype.buffer.create_zero_length() + obj.index = index + return obj + + def __getitem__(self, chunk_coords: ChunkCoords) -> Buffer: + chunk_byte_slice = self.index.get_chunk_slice(chunk_coords) + if chunk_byte_slice: + return self.buf[chunk_byte_slice[0] : chunk_byte_slice[1]] + raise KeyError + + def __len__(self) -> int: + return int(self.index.offsets_and_lengths.size / 2) + + def __iter__(self) -> Iterator[ChunkCoords]: + return c_order_iter(self.index.offsets_and_lengths.shape[:-1]) + + def is_empty(self) -> bool: + return self.index.is_all_empty() + + +class _ShardBuilder(_ShardReader, ShardMutableMapping): + buf: Buffer + index: _ShardIndex + + @classmethod + def merge_with_morton_order( + cls, + chunks_per_shard: ChunkCoords, + tombstones: set[ChunkCoords], + *shard_dicts: ShardMapping, + ) -> _ShardBuilder: + obj = cls.create_empty(chunks_per_shard) + for chunk_coords in morton_order_iter(chunks_per_shard): + if chunk_coords in tombstones: + continue + for shard_dict in shard_dicts: + maybe_value = shard_dict.get(chunk_coords, None) + if maybe_value is not None: + obj[chunk_coords] = maybe_value + break + return obj + + @classmethod + def create_empty( + cls, chunks_per_shard: ChunkCoords, buffer_prototype: BufferPrototype | None = None + ) -> _ShardBuilder: + if buffer_prototype is None: + buffer_prototype = default_buffer_prototype() + obj = cls() + obj.buf = buffer_prototype.buffer.create_zero_length() + obj.index = _ShardIndex.create_empty(chunks_per_shard) + return obj + + def __setitem__(self, chunk_coords: ChunkCoords, value: Buffer) -> None: + chunk_start = len(self.buf) + chunk_length = len(value) + self.buf = self.buf + value + self.index.set_chunk_slice(chunk_coords, slice(chunk_start, chunk_start + chunk_length)) + + def __delitem__(self, chunk_coords: ChunkCoords) -> None: + raise NotImplementedError + + async def finalize( + self, + index_location: ShardingCodecIndexLocation, + index_encoder: Callable[[_ShardIndex], Awaitable[Buffer]], + ) -> Buffer: + index_bytes = await index_encoder(self.index) + if index_location == ShardingCodecIndexLocation.start: + self.index.offsets_and_lengths[..., 0] += len(index_bytes) + index_bytes = await index_encoder(self.index) # encode again with corrected offsets + out_buf = index_bytes + self.buf + else: + out_buf = self.buf + index_bytes + return out_buf + + +@dataclass(frozen=True) +class _MergingShardBuilder(ShardMutableMapping): + old_dict: _ShardReader + new_dict: _ShardBuilder + tombstones: set[ChunkCoords] = field(default_factory=set) + + def __getitem__(self, chunk_coords: ChunkCoords) -> Buffer: + chunk_bytes_maybe = self.new_dict.get(chunk_coords) + if chunk_bytes_maybe is not None: + return chunk_bytes_maybe + return self.old_dict[chunk_coords] + + def __setitem__(self, chunk_coords: ChunkCoords, value: Buffer) -> None: + self.new_dict[chunk_coords] = value + + def __delitem__(self, chunk_coords: ChunkCoords) -> None: + self.tombstones.add(chunk_coords) + + def __len__(self) -> int: + return self.old_dict.__len__() + + def __iter__(self) -> Iterator[ChunkCoords]: + return self.old_dict.__iter__() + + def is_empty(self) -> bool: + full_chunk_coords_map = self.old_dict.index.get_full_chunk_map() + full_chunk_coords_map = np.logical_or( + full_chunk_coords_map, self.new_dict.index.get_full_chunk_map() + ) + for tombstone in self.tombstones: + full_chunk_coords_map[tombstone] = False + return bool(np.array_equiv(full_chunk_coords_map, False)) + + async def finalize( + self, + index_location: ShardingCodecIndexLocation, + index_encoder: Callable[[_ShardIndex], Awaitable[Buffer]], + ) -> Buffer: + shard_builder = _ShardBuilder.merge_with_morton_order( + self.new_dict.index.chunks_per_shard, + self.tombstones, + self.new_dict, + self.old_dict, + ) + return await shard_builder.finalize(index_location, index_encoder) + + +@dataclass(frozen=True) +class ShardingCodec( + ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin +): + chunk_shape: ChunkCoords + codecs: tuple[Codec, ...] + index_codecs: tuple[Codec, ...] + index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end + + def __init__( + self, + *, + chunk_shape: ChunkCoordsLike, + codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),), + index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()), + index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end, + ) -> None: + chunk_shape_parsed = parse_shapelike(chunk_shape) + codecs_parsed = parse_codecs(codecs) + index_codecs_parsed = parse_codecs(index_codecs) + index_location_parsed = parse_index_location(index_location) + + object.__setattr__(self, "chunk_shape", chunk_shape_parsed) + object.__setattr__(self, "codecs", codecs_parsed) + object.__setattr__(self, "index_codecs", index_codecs_parsed) + object.__setattr__(self, "index_location", index_location_parsed) + + # Use instance-local lru_cache to avoid memory leaks + object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) + object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) + object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) + + # todo: typedict return type + def __getstate__(self) -> dict[str, Any]: + return self.to_dict() + + def __setstate__(self, state: dict[str, Any]) -> None: + config = state["configuration"] + object.__setattr__(self, "chunk_shape", parse_shapelike(config["chunk_shape"])) + object.__setattr__(self, "codecs", parse_codecs(config["codecs"])) + object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"])) + object.__setattr__(self, "index_location", parse_index_location(config["index_location"])) + + # Use instance-local lru_cache to avoid memory leaks + object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) + object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) + object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration(data, "sharding_indexed") + return cls(**configuration_parsed) # type: ignore[arg-type] + + @property + def codec_pipeline(self) -> CodecPipeline: + return get_pipeline_class().from_codecs(self.codecs) + + def to_dict(self) -> dict[str, JSON]: + return { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": self.chunk_shape, + "codecs": tuple(s.to_dict() for s in self.codecs), + "index_codecs": tuple(s.to_dict() for s in self.index_codecs), + "index_location": self.index_location.value, + }, + } + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + shard_spec = self._get_chunk_spec(array_spec) + evolved_codecs = tuple(c.evolve_from_array_spec(array_spec=shard_spec) for c in self.codecs) + if evolved_codecs != self.codecs: + return replace(self, codecs=evolved_codecs) + return self + + def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None: + if len(self.chunk_shape) != len(shape): + raise ValueError( + "The shard's `chunk_shape` and array's `shape` need to have the same number of dimensions." + ) + if not isinstance(chunk_grid, RegularChunkGrid): + raise TypeError("Sharding is only compatible with regular chunk grids.") + if not all( + s % c == 0 + for s, c in zip( + chunk_grid.chunk_shape, + self.chunk_shape, + strict=False, + ) + ): + raise ValueError( + "The array's `chunk_shape` needs to be divisible by the shard's inner `chunk_shape`." + ) + + async def _decode_single( + self, + shard_bytes: Buffer, + shard_spec: ArraySpec, + ) -> NDBuffer: + shard_shape = shard_spec.shape + chunk_shape = self.chunk_shape + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + chunk_spec = self._get_chunk_spec(shard_spec) + + indexer = BasicIndexer( + tuple(slice(0, s) for s in shard_shape), + shape=shard_shape, + chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), + ) + + # setup output array + out = chunk_spec.prototype.nd_buffer.create( + shape=shard_shape, dtype=shard_spec.dtype, order=shard_spec.order, fill_value=0 + ) + shard_dict = await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard) + + if shard_dict.index.is_all_empty(): + out.fill(shard_spec.fill_value) + return out + + # decoding chunks and writing them into the output buffer + await self.codec_pipeline.read( + [ + ( + _ShardingByteGetter(shard_dict, chunk_coords), + chunk_spec, + chunk_selection, + out_selection, + ) + for chunk_coords, chunk_selection, out_selection in indexer + ], + out, + ) + + return out + + async def _decode_partial_single( + self, + byte_getter: ByteGetter, + selection: SelectorTuple, + shard_spec: ArraySpec, + ) -> NDBuffer | None: + shard_shape = shard_spec.shape + chunk_shape = self.chunk_shape + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + chunk_spec = self._get_chunk_spec(shard_spec) + + indexer = get_indexer( + selection, + shape=shard_shape, + chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), + ) + + # setup output array + out = shard_spec.prototype.nd_buffer.create( + shape=indexer.shape, dtype=shard_spec.dtype, order=shard_spec.order, fill_value=0 + ) + + indexed_chunks = list(indexer) + all_chunk_coords = {chunk_coords for chunk_coords, _, _ in indexed_chunks} + + # reading bytes of all requested chunks + shard_dict: ShardMapping = {} + if self._is_total_shard(all_chunk_coords, chunks_per_shard): + # read entire shard + shard_dict_maybe = await self._load_full_shard_maybe( + byte_getter=byte_getter, + prototype=chunk_spec.prototype, + chunks_per_shard=chunks_per_shard, + ) + if shard_dict_maybe is None: + return None + shard_dict = shard_dict_maybe + else: + # read some chunks within the shard + shard_index = await self._load_shard_index_maybe(byte_getter, chunks_per_shard) + if shard_index is None: + return None + shard_dict = {} + for chunk_coords in all_chunk_coords: + chunk_byte_slice = shard_index.get_chunk_slice(chunk_coords) + if chunk_byte_slice: + chunk_bytes = await byte_getter.get( + prototype=chunk_spec.prototype, byte_range=chunk_byte_slice + ) + if chunk_bytes: + shard_dict[chunk_coords] = chunk_bytes + + # decoding chunks and writing them into the output buffer + await self.codec_pipeline.read( + [ + ( + _ShardingByteGetter(shard_dict, chunk_coords), + chunk_spec, + chunk_selection, + out_selection, + ) + for chunk_coords, chunk_selection, out_selection in indexer + ], + out, + ) + return out + + async def _encode_single( + self, + shard_array: NDBuffer, + shard_spec: ArraySpec, + ) -> Buffer | None: + shard_shape = shard_spec.shape + chunk_shape = self.chunk_shape + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + chunk_spec = self._get_chunk_spec(shard_spec) + + indexer = list( + BasicIndexer( + tuple(slice(0, s) for s in shard_shape), + shape=shard_shape, + chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), + ) + ) + + shard_builder = _ShardBuilder.create_empty(chunks_per_shard) + + await self.codec_pipeline.write( + [ + ( + _ShardingByteSetter(shard_builder, chunk_coords), + chunk_spec, + chunk_selection, + out_selection, + ) + for chunk_coords, chunk_selection, out_selection in indexer + ], + shard_array, + ) + + return await shard_builder.finalize(self.index_location, self._encode_shard_index) + + async def _encode_partial_single( + self, + byte_setter: ByteSetter, + shard_array: NDBuffer, + selection: SelectorTuple, + shard_spec: ArraySpec, + ) -> None: + shard_shape = shard_spec.shape + chunk_shape = self.chunk_shape + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + chunk_spec = self._get_chunk_spec(shard_spec) + + shard_dict = _MergingShardBuilder( + await self._load_full_shard_maybe( + byte_getter=byte_setter, + prototype=chunk_spec.prototype, + chunks_per_shard=chunks_per_shard, + ) + or _ShardReader.create_empty(chunks_per_shard), + _ShardBuilder.create_empty(chunks_per_shard), + ) + + indexer = list( + get_indexer( + selection, shape=shard_shape, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape) + ) + ) + + await self.codec_pipeline.write( + [ + ( + _ShardingByteSetter(shard_dict, chunk_coords), + chunk_spec, + chunk_selection, + out_selection, + ) + for chunk_coords, chunk_selection, out_selection in indexer + ], + shard_array, + ) + + if shard_dict.is_empty(): + await byte_setter.delete() + else: + await byte_setter.set( + await shard_dict.finalize( + self.index_location, + self._encode_shard_index, + ) + ) + + def _is_total_shard( + self, all_chunk_coords: set[ChunkCoords], chunks_per_shard: ChunkCoords + ) -> bool: + return len(all_chunk_coords) == product(chunks_per_shard) and all( + chunk_coords in all_chunk_coords for chunk_coords in c_order_iter(chunks_per_shard) + ) + + async def _decode_shard_index( + self, index_bytes: Buffer, chunks_per_shard: ChunkCoords + ) -> _ShardIndex: + index_array = next( + iter( + await get_pipeline_class() + .from_codecs(self.index_codecs) + .decode( + [(index_bytes, self._get_index_chunk_spec(chunks_per_shard))], + ) + ) + ) + assert index_array is not None + return _ShardIndex(index_array.as_numpy_array()) + + async def _encode_shard_index(self, index: _ShardIndex) -> Buffer: + index_bytes = next( + iter( + await get_pipeline_class() + .from_codecs(self.index_codecs) + .encode( + [ + ( + get_ndbuffer_class().from_numpy_array(index.offsets_and_lengths), + self._get_index_chunk_spec(index.chunks_per_shard), + ) + ], + ) + ) + ) + assert index_bytes is not None + assert isinstance(index_bytes, Buffer) + return index_bytes + + def _shard_index_size(self, chunks_per_shard: ChunkCoords) -> int: + return ( + get_pipeline_class() + .from_codecs(self.index_codecs) + .compute_encoded_size( + 16 * product(chunks_per_shard), self._get_index_chunk_spec(chunks_per_shard) + ) + ) + + def _get_index_chunk_spec(self, chunks_per_shard: ChunkCoords) -> ArraySpec: + return ArraySpec( + shape=chunks_per_shard + (2,), + dtype=np.dtype(" ArraySpec: + return ArraySpec( + shape=self.chunk_shape, + dtype=shard_spec.dtype, + fill_value=shard_spec.fill_value, + order=shard_spec.order, + prototype=shard_spec.prototype, + ) + + def _get_chunks_per_shard(self, shard_spec: ArraySpec) -> ChunkCoords: + return tuple( + s // c + for s, c in zip( + shard_spec.shape, + self.chunk_shape, + strict=False, + ) + ) + + async def _load_shard_index_maybe( + self, byte_getter: ByteGetter, chunks_per_shard: ChunkCoords + ) -> _ShardIndex | None: + shard_index_size = self._shard_index_size(chunks_per_shard) + if self.index_location == ShardingCodecIndexLocation.start: + index_bytes = await byte_getter.get( + prototype=numpy_buffer_prototype(), byte_range=(0, shard_index_size) + ) + else: + index_bytes = await byte_getter.get( + prototype=numpy_buffer_prototype(), byte_range=(-shard_index_size, None) + ) + if index_bytes is not None: + return await self._decode_shard_index(index_bytes, chunks_per_shard) + return None + + async def _load_shard_index( + self, byte_getter: ByteGetter, chunks_per_shard: ChunkCoords + ) -> _ShardIndex: + return ( + await self._load_shard_index_maybe(byte_getter, chunks_per_shard) + ) or _ShardIndex.create_empty(chunks_per_shard) + + async def _load_full_shard_maybe( + self, byte_getter: ByteGetter, prototype: BufferPrototype, chunks_per_shard: ChunkCoords + ) -> _ShardReader | None: + shard_bytes = await byte_getter.get(prototype=prototype) + + return ( + await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard) + if shard_bytes + else None + ) + + def compute_encoded_size(self, input_byte_length: int, shard_spec: ArraySpec) -> int: + chunks_per_shard = self._get_chunks_per_shard(shard_spec) + return input_byte_length + self._shard_index_size(chunks_per_shard) + + +register_codec("sharding_indexed", ShardingCodec) diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py new file mode 100644 index 0000000000..3a471beaf5 --- /dev/null +++ b/src/zarr/codecs/transpose.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +from collections.abc import Iterable +from dataclasses import dataclass, replace +from typing import TYPE_CHECKING, cast + +import numpy as np + +from zarr.abc.codec import ArrayArrayCodec +from zarr.core.array_spec import ArraySpec +from zarr.core.common import JSON, ChunkCoordsLike, parse_named_configuration +from zarr.registry import register_codec + +if TYPE_CHECKING: + from typing import Any, Self + + from zarr.core.buffer import NDBuffer + from zarr.core.chunk_grids import ChunkGrid + + +def parse_transpose_order(data: JSON | Iterable[int]) -> tuple[int, ...]: + if not isinstance(data, Iterable): + raise TypeError(f"Expected an iterable. Got {data} instead.") + if not all(isinstance(a, int) for a in data): + raise TypeError(f"Expected an iterable of integers. Got {data} instead.") + return tuple(cast(Iterable[int], data)) + + +@dataclass(frozen=True) +class TransposeCodec(ArrayArrayCodec): + is_fixed_size = True + + order: tuple[int, ...] + + def __init__(self, *, order: ChunkCoordsLike) -> None: + order_parsed = parse_transpose_order(order) + + object.__setattr__(self, "order", order_parsed) + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration(data, "transpose") + return cls(**configuration_parsed) # type: ignore[arg-type] + + def to_dict(self) -> dict[str, JSON]: + return {"name": "transpose", "configuration": {"order": tuple(self.order)}} + + def validate(self, shape: tuple[int, ...], dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None: + if len(self.order) != len(shape): + raise ValueError( + f"The `order` tuple needs have as many entries as there are dimensions in the array. Got {self.order}." + ) + if len(self.order) != len(set(self.order)): + raise ValueError( + f"There must not be duplicates in the `order` tuple. Got {self.order}." + ) + if not all(0 <= x < len(shape) for x in self.order): + raise ValueError( + f"All entries in the `order` tuple must be between 0 and the number of dimensions in the array. Got {self.order}." + ) + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + ndim = array_spec.ndim + if len(self.order) != ndim: + raise ValueError( + f"The `order` tuple needs have as many entries as there are dimensions in the array. Got {self.order}." + ) + if len(self.order) != len(set(self.order)): + raise ValueError( + f"There must not be duplicates in the `order` tuple. Got {self.order}." + ) + if not all(0 <= x < ndim for x in self.order): + raise ValueError( + f"All entries in the `order` tuple must be between 0 and the number of dimensions in the array. Got {self.order}." + ) + order = tuple(self.order) + + if order != self.order: + return replace(self, order=order) + return self + + def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: + return ArraySpec( + shape=tuple(chunk_spec.shape[self.order[i]] for i in range(chunk_spec.ndim)), + dtype=chunk_spec.dtype, + fill_value=chunk_spec.fill_value, + order=chunk_spec.order, + prototype=chunk_spec.prototype, + ) + + async def _decode_single( + self, + chunk_array: NDBuffer, + chunk_spec: ArraySpec, + ) -> NDBuffer: + inverse_order = np.argsort(self.order) + return chunk_array.transpose(inverse_order) + + async def _encode_single( + self, + chunk_array: NDBuffer, + _chunk_spec: ArraySpec, + ) -> NDBuffer | None: + return chunk_array.transpose(self.order) + + def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: + return input_byte_length + + +register_codec("transpose", TransposeCodec) diff --git a/src/zarr/codecs/vlen_utf8.py b/src/zarr/codecs/vlen_utf8.py new file mode 100644 index 0000000000..43544e0809 --- /dev/null +++ b/src/zarr/codecs/vlen_utf8.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import numpy as np +from numcodecs.vlen import VLenBytes, VLenUTF8 + +from zarr.abc.codec import ArrayBytesCodec +from zarr.core.buffer import Buffer, NDBuffer +from zarr.core.common import JSON, parse_named_configuration +from zarr.core.strings import cast_to_string_dtype +from zarr.registry import register_codec + +if TYPE_CHECKING: + from typing import Self + + from zarr.core.array_spec import ArraySpec + + +# can use a global because there are no parameters +_vlen_utf8_codec = VLenUTF8() +_vlen_bytes_codec = VLenBytes() + + +@dataclass(frozen=True) +class VLenUTF8Codec(ArrayBytesCodec): + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration( + data, "vlen-utf8", require_configuration=False + ) + configuration_parsed = configuration_parsed or {} + return cls(**configuration_parsed) + + def to_dict(self) -> dict[str, JSON]: + return {"name": "vlen-utf8", "configuration": {}} + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + return self + + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> NDBuffer: + assert isinstance(chunk_bytes, Buffer) + + raw_bytes = chunk_bytes.as_array_like() + decoded = _vlen_utf8_codec.decode(raw_bytes) + assert decoded.dtype == np.object_ + decoded.shape = chunk_spec.shape + # coming out of the code, we know this is safe, so don't issue a warning + as_string_dtype = cast_to_string_dtype(decoded, safe=True) + return chunk_spec.prototype.nd_buffer.from_numpy_array(as_string_dtype) + + async def _encode_single( + self, + chunk_array: NDBuffer, + chunk_spec: ArraySpec, + ) -> Buffer | None: + assert isinstance(chunk_array, NDBuffer) + return chunk_spec.prototype.buffer.from_bytes( + _vlen_utf8_codec.encode(chunk_array.as_numpy_array()) + ) + + def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: + # what is input_byte_length for an object dtype? + raise NotImplementedError("compute_encoded_size is not implemented for VLen codecs") + + +@dataclass(frozen=True) +class VLenBytesCodec(ArrayBytesCodec): + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration( + data, "vlen-bytes", require_configuration=False + ) + configuration_parsed = configuration_parsed or {} + return cls(**configuration_parsed) + + def to_dict(self) -> dict[str, JSON]: + return {"name": "vlen-bytes", "configuration": {}} + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + return self + + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> NDBuffer: + assert isinstance(chunk_bytes, Buffer) + + raw_bytes = chunk_bytes.as_array_like() + decoded = _vlen_bytes_codec.decode(raw_bytes) + assert decoded.dtype == np.object_ + decoded.shape = chunk_spec.shape + return chunk_spec.prototype.nd_buffer.from_numpy_array(decoded) + + async def _encode_single( + self, + chunk_array: NDBuffer, + chunk_spec: ArraySpec, + ) -> Buffer | None: + assert isinstance(chunk_array, NDBuffer) + return chunk_spec.prototype.buffer.from_bytes( + _vlen_bytes_codec.encode(chunk_array.as_numpy_array()) + ) + + def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: + # what is input_byte_length for an object dtype? + raise NotImplementedError("compute_encoded_size is not implemented for VLen codecs") + + +register_codec("vlen-utf8", VLenUTF8Codec) +register_codec("vlen-bytes", VLenBytesCodec) diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py new file mode 100644 index 0000000000..949f762b20 --- /dev/null +++ b/src/zarr/codecs/zstd.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from functools import cached_property +from importlib.metadata import version +from typing import TYPE_CHECKING + +from numcodecs.zstd import Zstd + +from zarr.abc.codec import BytesBytesCodec +from zarr.core.buffer.cpu import as_numpy_array_wrapper +from zarr.core.common import JSON, parse_named_configuration +from zarr.registry import register_codec + +if TYPE_CHECKING: + from typing import Self + + from zarr.core.array_spec import ArraySpec + from zarr.core.buffer import Buffer + + +def parse_zstd_level(data: JSON) -> int: + if isinstance(data, int): + if data >= 23: + raise ValueError(f"Value must be less than or equal to 22. Got {data} instead.") + return data + raise TypeError(f"Got value with type {type(data)}, but expected an int.") + + +def parse_checksum(data: JSON) -> bool: + if isinstance(data, bool): + return data + raise TypeError(f"Expected bool. Got {type(data)}.") + + +@dataclass(frozen=True) +class ZstdCodec(BytesBytesCodec): + is_fixed_size = True + + level: int = 0 + checksum: bool = False + + def __init__(self, *, level: int = 0, checksum: bool = False) -> None: + # numcodecs 0.13.0 introduces the checksum attribute for the zstd codec + _numcodecs_version = tuple(map(int, version("numcodecs").split("."))) + if _numcodecs_version < (0, 13, 0): # pragma: no cover + raise RuntimeError( + "numcodecs version >= 0.13.0 is required to use the zstd codec. " + f"Version {_numcodecs_version} is currently installed." + ) + + level_parsed = parse_zstd_level(level) + checksum_parsed = parse_checksum(checksum) + + object.__setattr__(self, "level", level_parsed) + object.__setattr__(self, "checksum", checksum_parsed) + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration(data, "zstd") + return cls(**configuration_parsed) # type: ignore[arg-type] + + def to_dict(self) -> dict[str, JSON]: + return {"name": "zstd", "configuration": {"level": self.level, "checksum": self.checksum}} + + @cached_property + def _zstd_codec(self) -> Zstd: + config_dict = {"level": self.level, "checksum": self.checksum} + return Zstd.from_config(config_dict) + + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> Buffer: + return await asyncio.to_thread( + as_numpy_array_wrapper, self._zstd_codec.decode, chunk_bytes, chunk_spec.prototype + ) + + async def _encode_single( + self, + chunk_bytes: Buffer, + chunk_spec: ArraySpec, + ) -> Buffer | None: + return await asyncio.to_thread( + as_numpy_array_wrapper, self._zstd_codec.encode, chunk_bytes, chunk_spec.prototype + ) + + def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: + raise NotImplementedError + + +register_codec("zstd", ZstdCodec) diff --git a/src/zarr/convenience.py b/src/zarr/convenience.py new file mode 100644 index 0000000000..2551d455a4 --- /dev/null +++ b/src/zarr/convenience.py @@ -0,0 +1,35 @@ +import warnings + +from zarr.api.synchronous import ( + consolidate_metadata, + copy, + copy_all, + copy_store, + load, + open, + open_consolidated, + save, + save_array, + save_group, + tree, +) + +__all__ = [ + "consolidate_metadata", + "copy", + "copy_all", + "copy_store", + "load", + "open", + "open_consolidated", + "save", + "save_array", + "save_group", + "tree", +] + +warnings.warn( + "zarr.convenience is deprecated, use zarr.api.synchronous", + DeprecationWarning, + stacklevel=2, +) diff --git a/src/zarr/core/__init__.py b/src/zarr/core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py new file mode 100644 index 0000000000..ca405842e0 --- /dev/null +++ b/src/zarr/core/array.py @@ -0,0 +1,2580 @@ +from __future__ import annotations + +import json +from asyncio import gather +from dataclasses import dataclass, field, replace +from logging import getLogger +from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload + +import numpy as np +import numpy.typing as npt + +from zarr._compat import _deprecate_positional_args +from zarr.abc.store import Store, set_or_delete +from zarr.codecs import _get_default_array_bytes_codec +from zarr.codecs._v2 import V2Compressor, V2Filters +from zarr.core.attributes import Attributes +from zarr.core.buffer import ( + BufferPrototype, + NDArrayLike, + NDBuffer, + default_buffer_prototype, +) +from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks +from zarr.core.chunk_key_encodings import ( + ChunkKeyEncoding, + DefaultChunkKeyEncoding, + V2ChunkKeyEncoding, +) +from zarr.core.common import ( + JSON, + ZARR_JSON, + ZARRAY_JSON, + ZATTRS_JSON, + ChunkCoords, + ShapeLike, + ZarrFormat, + concurrent_map, + parse_dtype, + parse_shapelike, + product, +) +from zarr.core.config import config, parse_indexing_order +from zarr.core.indexing import ( + BasicIndexer, + BasicSelection, + BlockIndex, + BlockIndexer, + CoordinateIndexer, + CoordinateSelection, + Fields, + Indexer, + MaskIndexer, + MaskSelection, + OIndex, + OrthogonalIndexer, + OrthogonalSelection, + Selection, + VIndex, + _iter_grid, + ceildiv, + check_fields, + check_no_multi_fields, + is_pure_fancy_indexing, + is_pure_orthogonal_indexing, + is_scalar, + pop_fields, +) +from zarr.core.metadata import ( + ArrayMetadata, + ArrayMetadataDict, + ArrayV2Metadata, + ArrayV2MetadataDict, + ArrayV3Metadata, + ArrayV3MetadataDict, + T_ArrayMetadata, +) +from zarr.core.metadata.v3 import parse_node_type_array +from zarr.core.sync import collect_aiterator, sync +from zarr.errors import MetadataValidationError +from zarr.registry import get_pipeline_class +from zarr.storage import StoreLike, make_store_path +from zarr.storage.common import StorePath, ensure_no_existing_node + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Sequence + from typing import Self + + from zarr.abc.codec import Codec, CodecPipeline + from zarr.core.group import AsyncGroup + +# Array and AsyncArray are defined in the base ``zarr`` namespace +__all__ = ["create_codec_pipeline", "parse_array_metadata"] + +logger = getLogger(__name__) + + +def parse_array_metadata(data: Any) -> ArrayMetadata: + if isinstance(data, ArrayMetadata): + return data + elif isinstance(data, dict): + if data["zarr_format"] == 3: + meta_out = ArrayV3Metadata.from_dict(data) + if len(meta_out.storage_transformers) > 0: + msg = ( + f"Array metadata contains storage transformers: {meta_out.storage_transformers}." + "Arrays with storage transformers are not supported in zarr-python at this time." + ) + raise ValueError(msg) + return meta_out + elif data["zarr_format"] == 2: + return ArrayV2Metadata.from_dict(data) + raise TypeError + + +def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline: + if isinstance(metadata, ArrayV3Metadata): + return get_pipeline_class().from_codecs(metadata.codecs) + elif isinstance(metadata, ArrayV2Metadata): + return get_pipeline_class().from_codecs( + [V2Filters(metadata.filters), V2Compressor(metadata.compressor)] + ) + else: + raise TypeError + + +async def get_array_metadata( + store_path: StorePath, zarr_format: ZarrFormat | None = 3 +) -> dict[str, JSON]: + if zarr_format == 2: + zarray_bytes, zattrs_bytes = await gather( + (store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get() + ) + if zarray_bytes is None: + raise FileNotFoundError(store_path) + elif zarr_format == 3: + zarr_json_bytes = await (store_path / ZARR_JSON).get() + if zarr_json_bytes is None: + raise FileNotFoundError(store_path) + elif zarr_format is None: + zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather( + (store_path / ZARR_JSON).get(), + (store_path / ZARRAY_JSON).get(), + (store_path / ZATTRS_JSON).get(), + ) + if zarr_json_bytes is not None and zarray_bytes is not None: + # TODO: revisit this exception type + # alternatively, we could warn and favor v3 + raise ValueError("Both zarr.json and .zarray objects exist") + if zarr_json_bytes is None and zarray_bytes is None: + raise FileNotFoundError(store_path) + # set zarr_format based on which keys were found + if zarr_json_bytes is not None: + zarr_format = 3 + else: + zarr_format = 2 + else: + raise MetadataValidationError("zarr_format", "2, 3, or None", zarr_format) + + metadata_dict: dict[str, JSON] + if zarr_format == 2: + # V2 arrays are comprised of a .zarray and .zattrs objects + assert zarray_bytes is not None + metadata_dict = json.loads(zarray_bytes.to_bytes()) + zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} + metadata_dict["attributes"] = zattrs_dict + else: + # V3 arrays are comprised of a zarr.json object + assert zarr_json_bytes is not None + metadata_dict = json.loads(zarr_json_bytes.to_bytes()) + + parse_node_type_array(metadata_dict.get("node_type")) + + return metadata_dict + + +@dataclass(frozen=True) +class AsyncArray(Generic[T_ArrayMetadata]): + metadata: T_ArrayMetadata + store_path: StorePath + codec_pipeline: CodecPipeline = field(init=False) + order: Literal["C", "F"] + + @overload + def __init__( + self: AsyncArray[ArrayV2Metadata], + metadata: ArrayV2Metadata | ArrayV2MetadataDict, + store_path: StorePath, + order: Literal["C", "F"] | None = None, + ) -> None: ... + + @overload + def __init__( + self: AsyncArray[ArrayV3Metadata], + metadata: ArrayV3Metadata | ArrayV3MetadataDict, + store_path: StorePath, + order: Literal["C", "F"] | None = None, + ) -> None: ... + + def __init__( + self, + metadata: ArrayMetadata | ArrayMetadataDict, + store_path: StorePath, + order: Literal["C", "F"] | None = None, + ) -> None: + if isinstance(metadata, dict): + zarr_format = metadata["zarr_format"] + # TODO: remove this when we extensively type the dict representation of metadata + _metadata = cast(dict[str, JSON], metadata) + if zarr_format == 2: + metadata = ArrayV2Metadata.from_dict(_metadata) + elif zarr_format == 3: + metadata = ArrayV3Metadata.from_dict(_metadata) + else: + raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") + + metadata_parsed = parse_array_metadata(metadata) + order_parsed = parse_indexing_order(order or config.get("array.order")) + + object.__setattr__(self, "metadata", metadata_parsed) + object.__setattr__(self, "store_path", store_path) + object.__setattr__(self, "order", order_parsed) + object.__setattr__(self, "codec_pipeline", create_codec_pipeline(metadata=metadata_parsed)) + + # this overload defines the function signature when zarr_format is 2 + @overload + @classmethod + async def create( + cls, + store: StoreLike, + *, + # v2 and v3 + shape: ShapeLike, + dtype: npt.DTypeLike, + zarr_format: Literal[2], + fill_value: Any | None = None, + attributes: dict[str, JSON] | None = None, + chunks: ShapeLike | None = None, + dimension_separator: Literal[".", "/"] | None = None, + order: Literal["C", "F"] | None = None, + filters: list[dict[str, JSON]] | None = None, + compressor: dict[str, JSON] | None = None, + # runtime + exists_ok: bool = False, + data: npt.ArrayLike | None = None, + ) -> AsyncArray[ArrayV2Metadata]: ... + + # this overload defines the function signature when zarr_format is 3 + @overload + @classmethod + async def create( + cls, + store: StoreLike, + *, + # v2 and v3 + shape: ShapeLike, + dtype: npt.DTypeLike, + zarr_format: Literal[3], + fill_value: Any | None = None, + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + # runtime + exists_ok: bool = False, + data: npt.ArrayLike | None = None, + ) -> AsyncArray[ArrayV3Metadata]: ... + + # this overload is necessary to handle the case where the `zarr_format` kwarg is unspecified + @overload + @classmethod + async def create( + cls, + store: StoreLike, + *, + # v2 and v3 + shape: ShapeLike, + dtype: npt.DTypeLike, + zarr_format: Literal[3] = 3, + fill_value: Any | None = None, + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + # runtime + exists_ok: bool = False, + data: npt.ArrayLike | None = None, + ) -> AsyncArray[ArrayV3Metadata]: ... + + @overload + @classmethod + async def create( + cls, + store: StoreLike, + *, + # v2 and v3 + shape: ShapeLike, + dtype: npt.DTypeLike, + zarr_format: ZarrFormat, + fill_value: Any | None = None, + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + # v2 only + chunks: ShapeLike | None = None, + dimension_separator: Literal[".", "/"] | None = None, + order: Literal["C", "F"] | None = None, + filters: list[dict[str, JSON]] | None = None, + compressor: dict[str, JSON] | None = None, + # runtime + exists_ok: bool = False, + data: npt.ArrayLike | None = None, + ) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: ... + + @classmethod + async def create( + cls, + store: StoreLike, + *, + # v2 and v3 + shape: ShapeLike, + dtype: npt.DTypeLike, + zarr_format: ZarrFormat = 3, + fill_value: Any | None = None, + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + # v2 only + chunks: ShapeLike | None = None, + dimension_separator: Literal[".", "/"] | None = None, + order: Literal["C", "F"] | None = None, + filters: list[dict[str, JSON]] | None = None, + compressor: dict[str, JSON] | None = None, + # runtime + exists_ok: bool = False, + data: npt.ArrayLike | None = None, + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + store_path = await make_store_path(store) + + dtype_parsed = parse_dtype(dtype, zarr_format) + shape = parse_shapelike(shape) + + if chunks is not None and chunk_shape is not None: + raise ValueError("Only one of chunk_shape or chunks can be provided.") + + if chunks: + _chunks = normalize_chunks(chunks, shape, dtype_parsed.itemsize) + else: + _chunks = normalize_chunks(chunk_shape, shape, dtype_parsed.itemsize) + + result: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata] + if zarr_format == 3: + if dimension_separator is not None: + raise ValueError( + "dimension_separator cannot be used for arrays with version 3. Use chunk_key_encoding instead." + ) + if order is not None: + raise ValueError( + "order cannot be used for arrays with version 3. Use a transpose codec instead." + ) + if filters is not None: + raise ValueError( + "filters cannot be used for arrays with version 3. Use array-to-array codecs instead." + ) + if compressor is not None: + raise ValueError( + "compressor cannot be used for arrays with version 3. Use bytes-to-bytes codecs instead." + ) + result = await cls._create_v3( + store_path, + shape=shape, + dtype=dtype_parsed, + chunk_shape=_chunks, + fill_value=fill_value, + chunk_key_encoding=chunk_key_encoding, + codecs=codecs, + dimension_names=dimension_names, + attributes=attributes, + exists_ok=exists_ok, + ) + elif zarr_format == 2: + if dtype is str or dtype == "str": + # another special case: zarr v2 added the vlen-utf8 codec + vlen_codec: dict[str, JSON] = {"id": "vlen-utf8"} + if filters and not any(x["id"] == "vlen-utf8" for x in filters): + filters = list(filters) + [vlen_codec] + else: + filters = [vlen_codec] + + if codecs is not None: + raise ValueError( + "codecs cannot be used for arrays with version 2. Use filters and compressor instead." + ) + if chunk_key_encoding is not None: + raise ValueError( + "chunk_key_encoding cannot be used for arrays with version 2. Use dimension_separator instead." + ) + if dimension_names is not None: + raise ValueError("dimension_names cannot be used for arrays with version 2.") + result = await cls._create_v2( + store_path, + shape=shape, + dtype=dtype_parsed, + chunks=_chunks, + dimension_separator=dimension_separator, + fill_value=fill_value, + order=order, + filters=filters, + compressor=compressor, + attributes=attributes, + exists_ok=exists_ok, + ) + else: + raise ValueError(f"Insupported zarr_format. Got: {zarr_format}") + + if data is not None: + # insert user-provided data + await result.setitem(..., data) + + return result + + @classmethod + async def _create_v3( + cls, + store_path: StorePath, + *, + shape: ShapeLike, + dtype: npt.DTypeLike, + chunk_shape: ChunkCoords, + fill_value: Any | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + attributes: dict[str, JSON] | None = None, + exists_ok: bool = False, + ) -> AsyncArray[ArrayV3Metadata]: + if not exists_ok: + await ensure_no_existing_node(store_path, zarr_format=3) + + shape = parse_shapelike(shape) + codecs = ( + list(codecs) + if codecs is not None + else [_get_default_array_bytes_codec(np.dtype(dtype))] + ) + + if chunk_key_encoding is None: + chunk_key_encoding = ("default", "/") + assert chunk_key_encoding is not None + + if isinstance(chunk_key_encoding, tuple): + chunk_key_encoding = ( + V2ChunkKeyEncoding(separator=chunk_key_encoding[1]) + if chunk_key_encoding[0] == "v2" + else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) + ) + + metadata = ArrayV3Metadata( + shape=shape, + data_type=dtype, + chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), + chunk_key_encoding=chunk_key_encoding, + fill_value=fill_value, + codecs=codecs, + dimension_names=tuple(dimension_names) if dimension_names else None, + attributes=attributes or {}, + ) + + array = cls(metadata=metadata, store_path=store_path) + await array._save_metadata(metadata, ensure_parents=True) + return array + + @classmethod + async def _create_v2( + cls, + store_path: StorePath, + *, + shape: ChunkCoords, + dtype: npt.DTypeLike, + chunks: ChunkCoords, + dimension_separator: Literal[".", "/"] | None = None, + fill_value: None | float = None, + order: Literal["C", "F"] | None = None, + filters: list[dict[str, JSON]] | None = None, + compressor: dict[str, JSON] | None = None, + attributes: dict[str, JSON] | None = None, + exists_ok: bool = False, + ) -> AsyncArray[ArrayV2Metadata]: + if not exists_ok: + await ensure_no_existing_node(store_path, zarr_format=2) + if order is None: + order = "C" + + if dimension_separator is None: + dimension_separator = "." + + metadata = ArrayV2Metadata( + shape=shape, + dtype=np.dtype(dtype), + chunks=chunks, + order=order, + dimension_separator=dimension_separator, + fill_value=fill_value, + compressor=compressor, + filters=filters, + attributes=attributes, + ) + array = cls(metadata=metadata, store_path=store_path) + await array._save_metadata(metadata, ensure_parents=True) + return array + + @classmethod + def from_dict( + cls, + store_path: StorePath, + data: dict[str, JSON], + ) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: + metadata = parse_array_metadata(data) + return cls(metadata=metadata, store_path=store_path) + + @classmethod + async def open( + cls, + store: StoreLike, + zarr_format: ZarrFormat | None = 3, + ) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: + store_path = await make_store_path(store) + metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) + # TODO: remove this cast when we have better type hints + _metadata_dict = cast(ArrayV3MetadataDict, metadata_dict) + return cls(store_path=store_path, metadata=_metadata_dict) + + @property + def store(self) -> Store: + return self.store_path.store + + @property + def ndim(self) -> int: + return len(self.metadata.shape) + + @property + def shape(self) -> ChunkCoords: + return self.metadata.shape + + @property + def chunks(self) -> ChunkCoords: + if isinstance(self.metadata.chunk_grid, RegularChunkGrid): + return self.metadata.chunk_grid.chunk_shape + + msg = ( + f"The `chunks` attribute is only defined for arrays using `RegularChunkGrid`." + f"This array has a {self.metadata.chunk_grid} instead." + ) + raise NotImplementedError(msg) + + @property + def size(self) -> int: + return np.prod(self.metadata.shape).item() + + @property + def dtype(self) -> np.dtype[Any]: + return self.metadata.dtype + + @property + def attrs(self) -> dict[str, JSON]: + return self.metadata.attributes + + @property + def read_only(self) -> bool: + return self.store_path.store.mode.readonly + + @property + def path(self) -> str: + """Storage path.""" + return self.store_path.path + + @property + def name(self) -> str | None: + """Array name following h5py convention.""" + if self.path: + # follow h5py convention: add leading slash + name = self.path + if name[0] != "/": + name = "/" + name + return name + return None + + @property + def basename(self) -> str | None: + """Final component of name.""" + if self.name is not None: + return self.name.split("/")[-1] + return None + + @property + def cdata_shape(self) -> ChunkCoords: + """ + The shape of the chunk grid for this array. + """ + return tuple(ceildiv(s, c) for s, c in zip(self.shape, self.chunks, strict=False)) + + @property + def nchunks(self) -> int: + """ + The number of chunks in the stored representation of this array. + """ + return product(self.cdata_shape) + + @property + def nchunks_initialized(self) -> int: + """ + The number of chunks that have been persisted in storage. + """ + return nchunks_initialized(self) + + def _iter_chunk_coords( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[ChunkCoords]: + """ + Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` + keyword is used, iteration will start at the chunk index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + chunk_coords: ChunkCoords + The coordinates of each chunk in the selection. + """ + return _iter_grid(self.cdata_shape, origin=origin, selection_shape=selection_shape) + + def _iter_chunk_keys( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[str]: + """ + Iterate over the storage keys of each chunk, relative to an optional origin, and optionally + limited to a contiguous region in chunk grid coordinates. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + key: str + The storage key of each chunk in the selection. + """ + # Iterate over the coordinates of chunks in chunk grid space. + for k in self._iter_chunk_coords(origin=origin, selection_shape=selection_shape): + # Encode the chunk key from the chunk coordinates. + yield self.metadata.encode_chunk_key(k) + + def _iter_chunk_regions( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each chunk. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each chunk in the selection. + """ + for cgrid_position in self._iter_chunk_coords( + origin=origin, selection_shape=selection_shape + ): + out: tuple[slice, ...] = () + for c_pos, c_shape in zip(cgrid_position, self.chunks, strict=False): + start = c_pos * c_shape + stop = start + c_shape + out += (slice(start, stop, 1),) + yield out + + @property + def nbytes(self) -> int: + """ + The number of bytes that can be stored in this array. + """ + return self.nchunks * self.dtype.itemsize + + async def _get_selection( + self, + indexer: Indexer, + *, + prototype: BufferPrototype, + out: NDBuffer | None = None, + fields: Fields | None = None, + ) -> NDArrayLike: + # check fields are sensible + out_dtype = check_fields(fields, self.dtype) + + # setup output buffer + if out is not None: + if isinstance(out, NDBuffer): + out_buffer = out + else: + raise TypeError(f"out argument needs to be an NDBuffer. Got {type(out)!r}") + if out_buffer.shape != indexer.shape: + raise ValueError( + f"shape of out argument doesn't match. Expected {indexer.shape}, got {out.shape}" + ) + else: + out_buffer = prototype.nd_buffer.create( + shape=indexer.shape, + dtype=out_dtype, + order=self.order, + fill_value=self.metadata.fill_value, + ) + if product(indexer.shape) > 0: + # reading chunks and decoding them + await self.codec_pipeline.read( + [ + ( + self.store_path / self.metadata.encode_chunk_key(chunk_coords), + self.metadata.get_chunk_spec(chunk_coords, self.order, prototype=prototype), + chunk_selection, + out_selection, + ) + for chunk_coords, chunk_selection, out_selection in indexer + ], + out_buffer, + drop_axes=indexer.drop_axes, + ) + return out_buffer.as_ndarray_like() + + async def getitem( + self, + selection: BasicSelection, + *, + prototype: BufferPrototype | None = None, + ) -> NDArrayLike: + if prototype is None: + prototype = default_buffer_prototype() + indexer = BasicIndexer( + selection, + shape=self.metadata.shape, + chunk_grid=self.metadata.chunk_grid, + ) + return await self._get_selection(indexer, prototype=prototype) + + async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = False) -> None: + to_save = metadata.to_buffer_dict(default_buffer_prototype()) + awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()] + + if ensure_parents: + # To enable zarr.create(store, path="a/b/c"), we need to create all the intermediate groups. + parents = _build_parents(self) + + for parent in parents: + awaitables.extend( + [ + (parent.store_path / key).set_if_not_exists(value) + for key, value in parent.metadata.to_buffer_dict( + default_buffer_prototype() + ).items() + ] + ) + + await gather(*awaitables) + + async def _set_selection( + self, + indexer: Indexer, + value: npt.ArrayLike, + *, + prototype: BufferPrototype, + fields: Fields | None = None, + ) -> None: + # check fields are sensible + check_fields(fields, self.dtype) + fields = check_no_multi_fields(fields) + + # check value shape + if np.isscalar(value): + array_like = prototype.buffer.create_zero_length().as_array_like() + if isinstance(array_like, np._typing._SupportsArrayFunc): + # TODO: need to handle array types that don't support __array_function__ + # like PyTorch and JAX + array_like_ = cast(np._typing._SupportsArrayFunc, array_like) + value = np.asanyarray(value, dtype=self.metadata.dtype, like=array_like_) + else: + if not hasattr(value, "shape"): + value = np.asarray(value, self.metadata.dtype) + # assert ( + # value.shape == indexer.shape + # ), f"shape of value doesn't match indexer shape. Expected {indexer.shape}, got {value.shape}" + if not hasattr(value, "dtype") or value.dtype.name != self.metadata.dtype.name: + if hasattr(value, "astype"): + # Handle things that are already NDArrayLike more efficiently + value = value.astype(dtype=self.metadata.dtype, order="A") + else: + value = np.array(value, dtype=self.metadata.dtype, order="A") + value = cast(NDArrayLike, value) + # We accept any ndarray like object from the user and convert it + # to a NDBuffer (or subclass). From this point onwards, we only pass + # Buffer and NDBuffer between components. + value_buffer = prototype.nd_buffer.from_ndarray_like(value) + + # merging with existing data and encoding chunks + await self.codec_pipeline.write( + [ + ( + self.store_path / self.metadata.encode_chunk_key(chunk_coords), + self.metadata.get_chunk_spec(chunk_coords, self.order, prototype), + chunk_selection, + out_selection, + ) + for chunk_coords, chunk_selection, out_selection in indexer + ], + value_buffer, + drop_axes=indexer.drop_axes, + ) + + async def setitem( + self, + selection: BasicSelection, + value: npt.ArrayLike, + prototype: BufferPrototype | None = None, + ) -> None: + if prototype is None: + prototype = default_buffer_prototype() + indexer = BasicIndexer( + selection, + shape=self.metadata.shape, + chunk_grid=self.metadata.chunk_grid, + ) + return await self._set_selection(indexer, value, prototype=prototype) + + async def resize(self, new_shape: ChunkCoords, delete_outside_chunks: bool = True) -> Self: + assert len(new_shape) == len(self.metadata.shape) + new_metadata = self.metadata.update_shape(new_shape) + + # Remove all chunks outside of the new shape + old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape)) + new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape)) + + if delete_outside_chunks: + + async def _delete_key(key: str) -> None: + await (self.store_path / key).delete() + + await concurrent_map( + [ + (self.metadata.encode_chunk_key(chunk_coords),) + for chunk_coords in old_chunk_coords.difference(new_chunk_coords) + ], + _delete_key, + config.get("async.concurrency"), + ) + + # Write new metadata + await self._save_metadata(new_metadata) + return replace(self, metadata=new_metadata) + + async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: + # metadata.attributes is "frozen" so we simply clear and update the dict + self.metadata.attributes.clear() + self.metadata.attributes.update(new_attributes) + + # Write new metadata + await self._save_metadata(self.metadata) + + return self + + def __repr__(self) -> str: + return f"" + + async def info(self) -> None: + raise NotImplementedError + + +@dataclass(frozen=True) +class Array: + _async_array: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata] + + @classmethod + @_deprecate_positional_args + def create( + cls, + store: StoreLike, + *, + # v2 and v3 + shape: ChunkCoords, + dtype: npt.DTypeLike, + zarr_format: ZarrFormat = 3, + fill_value: Any | None = None, + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + # v2 only + chunks: ChunkCoords | None = None, + dimension_separator: Literal[".", "/"] | None = None, + order: Literal["C", "F"] | None = None, + filters: list[dict[str, JSON]] | None = None, + compressor: dict[str, JSON] | None = None, + # runtime + exists_ok: bool = False, + ) -> Array: + async_array = sync( + AsyncArray.create( + store=store, + shape=shape, + dtype=dtype, + zarr_format=zarr_format, + attributes=attributes, + fill_value=fill_value, + chunk_shape=chunk_shape, + chunk_key_encoding=chunk_key_encoding, + codecs=codecs, + dimension_names=dimension_names, + chunks=chunks, + dimension_separator=dimension_separator, + order=order, + filters=filters, + compressor=compressor, + exists_ok=exists_ok, + ), + ) + return cls(async_array) + + @classmethod + def from_dict( + cls, + store_path: StorePath, + data: dict[str, JSON], + ) -> Array: + async_array = AsyncArray.from_dict(store_path=store_path, data=data) + return cls(async_array) + + @classmethod + def open( + cls, + store: StoreLike, + ) -> Array: + async_array = sync(AsyncArray.open(store)) + return cls(async_array) + + @property + def store(self) -> Store: + return self._async_array.store + + @property + def ndim(self) -> int: + return self._async_array.ndim + + @property + def shape(self) -> ChunkCoords: + return self._async_array.shape + + @property + def chunks(self) -> ChunkCoords: + return self._async_array.chunks + + @property + def size(self) -> int: + return self._async_array.size + + @property + def dtype(self) -> np.dtype[Any]: + return self._async_array.dtype + + @property + def attrs(self) -> Attributes: + return Attributes(self) + + @property + def path(self) -> str: + """Storage path.""" + return self._async_array.path + + @property + def name(self) -> str | None: + """Array name following h5py convention.""" + return self._async_array.name + + @property + def basename(self) -> str | None: + """Final component of name.""" + return self._async_array.basename + + @property + def metadata(self) -> ArrayMetadata: + return self._async_array.metadata + + @property + def store_path(self) -> StorePath: + return self._async_array.store_path + + @property + def order(self) -> Literal["C", "F"]: + return self._async_array.order + + @property + def read_only(self) -> bool: + return self._async_array.read_only + + @property + def fill_value(self) -> Any: + return self.metadata.fill_value + + @property + def cdata_shape(self) -> ChunkCoords: + """ + The shape of the chunk grid for this array. + """ + return tuple(ceildiv(s, c) for s, c in zip(self.shape, self.chunks, strict=False)) + + @property + def nchunks(self) -> int: + """ + The number of chunks in the stored representation of this array. + """ + return self._async_array.nchunks + + def _iter_chunk_coords( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[ChunkCoords]: + """ + Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` + keyword is used, iteration will start at the chunk index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin + selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + chunk_coords: ChunkCoords + The coordinates of each chunk in the selection. + """ + yield from self._async_array._iter_chunk_coords( + origin=origin, selection_shape=selection_shape + ) + + @property + def nbytes(self) -> int: + """ + The number of bytes that can be stored in this array. + """ + return self._async_array.nbytes + + @property + def nchunks_initialized(self) -> int: + """ + The number of chunks that have been initialized in the stored representation of this array. + """ + return self._async_array.nchunks_initialized + + def _iter_chunk_keys( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[str]: + """ + Iterate over the storage keys of each chunk, relative to an optional origin, and optionally + limited to a contiguous region in chunk grid coordinates. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + key: str + The storage key of each chunk in the selection. + """ + yield from self._async_array._iter_chunk_keys( + origin=origin, selection_shape=selection_shape + ) + + def _iter_chunk_regions( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each chunk. + + Parameters + ---------- + origin: Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape: Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each chunk in the selection. + """ + yield from self._async_array._iter_chunk_regions( + origin=origin, selection_shape=selection_shape + ) + + def __array__( + self, dtype: npt.DTypeLike | None = None, copy: bool | None = None + ) -> NDArrayLike: + """ + This method is used by numpy when converting zarr.Array into a numpy array. + For more information, see https://numpy.org/devdocs/user/basics.interoperability.html#the-array-method + """ + if copy is False: + msg = "`copy=False` is not supported. This method always creates a copy." + raise ValueError(msg) + + arr_np = self[...] + + if dtype is not None: + arr_np = arr_np.astype(dtype) + + return arr_np + + def __getitem__(self, selection: Selection) -> NDArrayLike: + """Retrieve data for an item or region of the array. + + Parameters + ---------- + selection : tuple + An integer index or slice or tuple of int/slice objects specifying the + requested item or region for each dimension of the array. + + Returns + ------- + NDArrayLike + An array-like containing the data for the requested region. + + Examples + -------- + Setup a 1-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(100, dtype="uint16") + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(10,), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve a single item:: + + >>> z[5] + 5 + + Retrieve a region via slicing:: + + >>> z[:5] + array([0, 1, 2, 3, 4]) + >>> z[-5:] + array([95, 96, 97, 98, 99]) + >>> z[5:10] + array([5, 6, 7, 8, 9]) + >>> z[5:10:2] + array([5, 7, 9]) + >>> z[::2] + array([ 0, 2, 4, ..., 94, 96, 98]) + + Load the entire array into memory:: + + >>> z[...] + array([ 0, 1, 2, ..., 97, 98, 99]) + + Setup a 2-dimensional array:: + + >>> data = np.arange(100, dtype="uint16").reshape(10, 10) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(10, 10), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve an item:: + + >>> z[2, 2] + 22 + + Retrieve a region via slicing:: + + >>> z[1:3, 1:3] + array([[11, 12], + [21, 22]]) + >>> z[1:3, :] + array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) + >>> z[:, 1:3] + array([[ 1, 2], + [11, 12], + [21, 22], + [31, 32], + [41, 42], + [51, 52], + [61, 62], + [71, 72], + [81, 82], + [91, 92]]) + >>> z[0:5:2, 0:5:2] + array([[ 0, 2, 4], + [20, 22, 24], + [40, 42, 44]]) + >>> z[::2, ::2] + array([[ 0, 2, 4, 6, 8], + [20, 22, 24, 26, 28], + [40, 42, 44, 46, 48], + [60, 62, 64, 66, 68], + [80, 82, 84, 86, 88]]) + + Load the entire array into memory:: + + >>> z[...] + array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], + [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59], + [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], + [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], + [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], + [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]) + + Notes + ----- + Slices with step > 1 are supported, but slices with negative step are not. + + For arrays with a structured dtype, see zarr v2 for examples of how to use + fields + + Currently the implementation for __getitem__ is provided by + :func:`vindex` if the indexing is pure fancy indexing (ie a + broadcast-compatible tuple of integer array indices), or by + :func:`set_basic_selection` otherwise. + + Effectively, this means that the following indexing modes are supported: + + - integer indexing + - slice indexing + - mixed slice and integer indexing + - boolean indexing + - fancy indexing (vectorized list of integers) + + For specific indexing options including outer indexing, see the + methods listed under See Also. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __setitem__ + + """ + fields, pure_selection = pop_fields(selection) + if is_pure_fancy_indexing(pure_selection, self.ndim): + return self.vindex[cast(CoordinateSelection | MaskSelection, selection)] + elif is_pure_orthogonal_indexing(pure_selection, self.ndim): + return self.get_orthogonal_selection(pure_selection, fields=fields) + else: + return self.get_basic_selection(cast(BasicSelection, pure_selection), fields=fields) + + def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: + """Modify data for an item or region of the array. + + Parameters + ---------- + selection : tuple + An integer index or slice or tuple of int/slice specifying the requested + region for each dimension of the array. + value : npt.ArrayLike + An array-like containing the data to be stored in the selection. + + Examples + -------- + Setup a 1-dimensional array:: + + >>> import zarr + >>> z = zarr.zeros( + >>> shape=(100,), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5,), + >>> dtype="i4", + >>> ) + + Set all array elements to the same scalar value:: + + >>> z[...] = 42 + >>> z[...] + array([42, 42, 42, ..., 42, 42, 42]) + + Set a portion of the array:: + + >>> z[:10] = np.arange(10) + >>> z[-10:] = np.arange(10)[::-1] + >>> z[...] + array([ 0, 1, 2, ..., 2, 1, 0]) + + Setup a 2-dimensional array:: + + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) + + Set all array elements to the same scalar value:: + + >>> z[...] = 42 + + Set a portion of the array:: + + >>> z[0, :] = np.arange(z.shape[1]) + >>> z[:, 0] = np.arange(z.shape[0]) + >>> z[...] + array([[ 0, 1, 2, 3, 4], + [ 1, 42, 42, 42, 42], + [ 2, 42, 42, 42, 42], + [ 3, 42, 42, 42, 42], + [ 4, 42, 42, 42, 42]]) + + Notes + ----- + Slices with step > 1 are supported, but slices with negative step are not. + + For arrays with a structured dtype, see zarr v2 for examples of how to use + fields + + Currently the implementation for __setitem__ is provided by + :func:`vindex` if the indexing is pure fancy indexing (ie a + broadcast-compatible tuple of integer array indices), or by + :func:`set_basic_selection` otherwise. + + Effectively, this means that the following indexing modes are supported: + + - integer indexing + - slice indexing + - mixed slice and integer indexing + - boolean indexing + - fancy indexing (vectorized list of integers) + + For specific indexing options including outer indexing, see the + methods listed under See Also. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__ + + """ + fields, pure_selection = pop_fields(selection) + if is_pure_fancy_indexing(pure_selection, self.ndim): + self.vindex[cast(CoordinateSelection | MaskSelection, selection)] = value + elif is_pure_orthogonal_indexing(pure_selection, self.ndim): + self.set_orthogonal_selection(pure_selection, value, fields=fields) + else: + self.set_basic_selection(cast(BasicSelection, pure_selection), value, fields=fields) + + @_deprecate_positional_args + def get_basic_selection( + self, + selection: BasicSelection = Ellipsis, + *, + out: NDBuffer | None = None, + prototype: BufferPrototype | None = None, + fields: Fields | None = None, + ) -> NDArrayLike: + """Retrieve data for an item or region of the array. + + Parameters + ---------- + selection : tuple + A tuple specifying the requested item or region for each dimension of the + array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + + Returns + ------- + NDArrayLike + An array-like containing the data for the requested region. + + Examples + -------- + Setup a 1-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(100, dtype="uint16") + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(3,), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve a single item:: + + >>> z.get_basic_selection(5) + 5 + + Retrieve a region via slicing:: + + >>> z.get_basic_selection(slice(5)) + array([0, 1, 2, 3, 4]) + >>> z.get_basic_selection(slice(-5, None)) + array([95, 96, 97, 98, 99]) + >>> z.get_basic_selection(slice(5, 10)) + array([5, 6, 7, 8, 9]) + >>> z.get_basic_selection(slice(5, 10, 2)) + array([5, 7, 9]) + >>> z.get_basic_selection(slice(None, None, 2)) + array([ 0, 2, 4, ..., 94, 96, 98]) + + Setup a 3-dimensional array:: + + >>> data = np.arange(1000).reshape(10, 10, 10) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(5, 5, 5), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve an item:: + + >>> z.get_basic_selection((1, 2, 3)) + 123 + + Retrieve a region via slicing and Ellipsis:: + + >>> z.get_basic_selection((slice(1, 3), slice(1, 3), 0)) + array([[110, 120], + [210, 220]]) + >>> z.get_basic_selection(0, (slice(1, 3), slice(None))) + array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) + >>> z.get_basic_selection((..., 5)) + array([[ 2 12 22 32 42 52 62 72 82 92] + [102 112 122 132 142 152 162 172 182 192] + ... + [802 812 822 832 842 852 862 872 882 892] + [902 912 922 932 942 952 962 972 982 992]] + + Notes + ----- + Slices with step > 1 are supported, but slices with negative step are not. + + For arrays with a structured dtype, see zarr v2 for examples of how to use + the `fields` parameter. + + This method provides the implementation for accessing data via the + square bracket notation (__getitem__). See :func:`__getitem__` for examples + using the alternative notation. + + See Also + -------- + set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + + if prototype is None: + prototype = default_buffer_prototype() + return sync( + self._async_array._get_selection( + BasicIndexer(selection, self.shape, self.metadata.chunk_grid), + out=out, + fields=fields, + prototype=prototype, + ) + ) + + @_deprecate_positional_args + def set_basic_selection( + self, + selection: BasicSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + """Modify data for an item or region of the array. + + Parameters + ---------- + selection : tuple + A tuple specifying the requested item or region for each dimension of the + array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. + value : npt.ArrayLike + An array-like containing values to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Examples + -------- + Setup a 1-dimensional array:: + + >>> import zarr + >>> z = zarr.zeros( + >>> shape=(100,), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(100,), + >>> dtype="i4", + >>> ) + + Set all array elements to the same scalar value:: + + >>> z.set_basic_selection(..., 42) + >>> z[...] + array([42, 42, 42, ..., 42, 42, 42]) + + Set a portion of the array:: + + >>> z.set_basic_selection(slice(10), np.arange(10)) + >>> z.set_basic_selection(slice(-10, None), np.arange(10)[::-1]) + >>> z[...] + array([ 0, 1, 2, ..., 2, 1, 0]) + + Setup a 2-dimensional array:: + + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) + + Set all array elements to the same scalar value:: + + >>> z.set_basic_selection(..., 42) + + Set a portion of the array:: + + >>> z.set_basic_selection((0, slice(None)), np.arange(z.shape[1])) + >>> z.set_basic_selection((slice(None), 0), np.arange(z.shape[0])) + >>> z[...] + array([[ 0, 1, 2, 3, 4], + [ 1, 42, 42, 42, 42], + [ 2, 42, 42, 42, 42], + [ 3, 42, 42, 42, 42], + [ 4, 42, 42, 42, 42]]) + + Notes + ----- + For arrays with a structured dtype, see zarr v2 for examples of how to use + the `fields` parameter. + + This method provides the underlying implementation for modifying data via square + bracket notation, see :func:`__setitem__` for equivalent examples using the + alternative notation. + + See Also + -------- + get_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if prototype is None: + prototype = default_buffer_prototype() + indexer = BasicIndexer(selection, self.shape, self.metadata.chunk_grid) + sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + + @_deprecate_positional_args + def get_orthogonal_selection( + self, + selection: OrthogonalSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLike: + """Retrieve data by making a selection for each dimension of the array. For + example, if an array has 2 dimensions, allows selecting specific rows and/or + columns. The selection for each dimension can be either an integer (indexing a + single item), a slice, an array of integers, or a Boolean array where True + values indicate a selection. + + Parameters + ---------- + selection : tuple + A selection for each dimension of the array. May be any combination of int, + slice, integer array or Boolean array. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + + Returns + ------- + NDArrayLike + An array-like containing the data for the requested selection. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(100).reshape(10, 10) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve rows and columns via any combination of int, slice, integer array and/or + Boolean array:: + + >>> z.get_orthogonal_selection(([1, 4], slice(None))) + array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) + >>> z.get_orthogonal_selection((slice(None), [1, 4])) + array([[ 1, 4], + [11, 14], + [21, 24], + [31, 34], + [41, 44], + [51, 54], + [61, 64], + [71, 74], + [81, 84], + [91, 94]]) + >>> z.get_orthogonal_selection(([1, 4], [1, 4])) + array([[11, 14], + [41, 44]]) + >>> sel = np.zeros(z.shape[0], dtype=bool) + >>> sel[1] = True + >>> sel[4] = True + >>> z.get_orthogonal_selection((sel, sel)) + array([[11, 14], + [41, 44]]) + + For convenience, the orthogonal selection functionality is also available via the + `oindex` property, e.g.:: + + >>> z.oindex[[1, 4], :] + array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) + >>> z.oindex[:, [1, 4]] + array([[ 1, 4], + [11, 14], + [21, 24], + [31, 34], + [41, 44], + [51, 54], + [61, 64], + [71, 74], + [81, 84], + [91, 94]]) + >>> z.oindex[[1, 4], [1, 4]] + array([[11, 14], + [41, 44]]) + >>> sel = np.zeros(z.shape[0], dtype=bool) + >>> sel[1] = True + >>> sel[4] = True + >>> z.oindex[sel, sel] + array([[11, 14], + [41, 44]]) + + Notes + ----- + Orthogonal indexing is also known as outer indexing. + + Slices with step > 1 are supported, but slices with negative step are not. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, set_orthogonal_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if prototype is None: + prototype = default_buffer_prototype() + indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) + return sync( + self._async_array._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + ) + + @_deprecate_positional_args + def set_orthogonal_selection( + self, + selection: OrthogonalSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + """Modify data via a selection for each dimension of the array. + + Parameters + ---------- + selection : tuple + A selection for each dimension of the array. May be any combination of int, + slice, integer array or Boolean array. + value : npt.ArrayLike + An array-like array containing the data to be stored in the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) + + + Set data for a selection of rows:: + + >>> z.set_orthogonal_selection(([1, 4], slice(None)), 1) + >>> z[...] + array([[0, 0, 0, 0, 0], + [1, 1, 1, 1, 1], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [1, 1, 1, 1, 1]]) + + Set data for a selection of columns:: + + >>> z.set_orthogonal_selection((slice(None), [1, 4]), 2) + >>> z[...] + array([[0, 2, 0, 0, 2], + [1, 2, 1, 1, 2], + [0, 2, 0, 0, 2], + [0, 2, 0, 0, 2], + [1, 2, 1, 1, 2]]) + + Set data for a selection of rows and columns:: + + >>> z.set_orthogonal_selection(([1, 4], [1, 4]), 3) + >>> z[...] + array([[0, 2, 0, 0, 2], + [1, 3, 1, 1, 3], + [0, 2, 0, 0, 2], + [0, 2, 0, 0, 2], + [1, 3, 1, 1, 3]]) + + Set data from a 2D array:: + + >>> values = np.arange(10).reshape(2, 5) + >>> z.set_orthogonal_selection(([0, 3], ...), values) + >>> z[...] + array([[0, 1, 2, 3, 4], + [1, 3, 1, 1, 3], + [0, 2, 0, 0, 2], + [5, 6, 7, 8, 9], + [1, 3, 1, 1, 3]]) + + For convenience, this functionality is also available via the `oindex` property. + E.g.:: + + >>> z.oindex[[1, 4], [1, 4]] = 4 + >>> z[...] + array([[0, 1, 2, 3, 4], + [1, 4, 1, 1, 4], + [0, 2, 0, 0, 2], + [5, 6, 7, 8, 9], + [1, 4, 1, 1, 4]]) + + Notes + ----- + Orthogonal indexing is also known as outer indexing. + + Slices with step > 1 are supported, but slices with negative step are not. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if prototype is None: + prototype = default_buffer_prototype() + indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) + return sync( + self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype) + ) + + @_deprecate_positional_args + def get_mask_selection( + self, + mask: MaskSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLike: + """Retrieve a selection of individual items, by providing a Boolean array of the + same shape as the array against which the selection is being made, where True + values indicate a selected item. + + Parameters + ---------- + selection : ndarray, bool + A Boolean array of the same shape as the array against which the selection is + being made. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + + Returns + ------- + NDArrayLike + An array-like containing the data for the requested selection. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(100).reshape(10, 10) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve items by specifying a mask:: + + >>> sel = np.zeros_like(z, dtype=bool) + >>> sel[1, 1] = True + >>> sel[4, 4] = True + >>> z.get_mask_selection(sel) + array([11, 44]) + + For convenience, the mask selection functionality is also available via the + `vindex` property, e.g.:: + + >>> z.vindex[sel] + array([11, 44]) + + Notes + ----- + Mask indexing is a form of vectorized or inner indexing, and is equivalent to + coordinate indexing. Internally the mask array is converted to coordinate + arrays by calling `np.nonzero`. + + See Also + -------- + get_basic_selection, set_basic_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + set_coordinate_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + """ + + if prototype is None: + prototype = default_buffer_prototype() + indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) + return sync( + self._async_array._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + ) + + @_deprecate_positional_args + def set_mask_selection( + self, + mask: MaskSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + """Modify a selection of individual items, by providing a Boolean array of the + same shape as the array against which the selection is being made, where True + values indicate a selected item. + + Parameters + ---------- + selection : ndarray, bool + A Boolean array of the same shape as the array against which the selection is + being made. + value : npt.ArrayLike + An array-like containing values to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) + + Set data for a selection of items:: + + >>> sel = np.zeros_like(z, dtype=bool) + >>> sel[1, 1] = True + >>> sel[4, 4] = True + >>> z.set_mask_selection(sel, 1) + >>> z[...] + array([[0, 0, 0, 0, 0], + [0, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 1]]) + + For convenience, this functionality is also available via the `vindex` property. + E.g.:: + + >>> z.vindex[sel] = 2 + >>> z[...] + array([[0, 0, 0, 0, 0], + [0, 2, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 2]]) + + Notes + ----- + Mask indexing is a form of vectorized or inner indexing, and is equivalent to + coordinate indexing. Internally the mask array is converted to coordinate + arrays by calling `np.nonzero`. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + set_coordinate_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if prototype is None: + prototype = default_buffer_prototype() + indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) + sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + + @_deprecate_positional_args + def get_coordinate_selection( + self, + selection: CoordinateSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLike: + """Retrieve a selection of individual items, by providing the indices + (coordinates) for each selected item. + + Parameters + ---------- + selection : tuple + An integer (coordinate) array for each dimension of the array. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + + Returns + ------- + NDArrayLike + An array-like containing the data for the requested coordinate selection. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(3, 3), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve items by specifying their coordinates:: + + >>> z.get_coordinate_selection(([1, 4], [1, 4])) + array([11, 44]) + + For convenience, the coordinate selection functionality is also available via the + `vindex` property, e.g.:: + + >>> z.vindex[[1, 4], [1, 4]] + array([11, 44]) + + Notes + ----- + Coordinate indexing is also known as point selection, and is a form of vectorized + or inner indexing. + + Slices are not supported. Coordinate arrays must be provided for all dimensions + of the array. + + Coordinate arrays may be multidimensional, in which case the output array will + also be multidimensional. Coordinate arrays are broadcast against each other + before being applied. The shape of the output will be the same as the shape of + each coordinate array after broadcasting. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, set_coordinate_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if prototype is None: + prototype = default_buffer_prototype() + indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) + out_array = sync( + self._async_array._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + ) + + if hasattr(out_array, "shape"): + # restore shape + out_array = np.array(out_array).reshape(indexer.sel_shape) + return out_array + + @_deprecate_positional_args + def set_coordinate_selection( + self, + selection: CoordinateSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + """Modify a selection of individual items, by providing the indices (coordinates) + for each item to be modified. + + Parameters + ---------- + selection : tuple + An integer (coordinate) array for each dimension of the array. + value : npt.ArrayLike + An array-like containing values to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) + + Set data for a selection of items:: + + >>> z.set_coordinate_selection(([1, 4], [1, 4]), 1) + >>> z[...] + array([[0, 0, 0, 0, 0], + [0, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 1]]) + + For convenience, this functionality is also available via the `vindex` property. + E.g.:: + + >>> z.vindex[[1, 4], [1, 4]] = 2 + >>> z[...] + array([[0, 0, 0, 0, 0], + [0, 2, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 2]]) + + Notes + ----- + Coordinate indexing is also known as point selection, and is a form of vectorized + or inner indexing. + + Slices are not supported. Coordinate arrays must be provided for all dimensions + of the array. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if prototype is None: + prototype = default_buffer_prototype() + # setup indexer + indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) + + # handle value - need ndarray-like flatten value + if not is_scalar(value, self.dtype): + try: + from numcodecs.compat import ensure_ndarray_like + + value = ensure_ndarray_like(value) # TODO replace with agnostic + except TypeError: + # Handle types like `list` or `tuple` + value = np.array(value) # TODO replace with agnostic + if hasattr(value, "shape") and len(value.shape) > 1: + value = np.array(value).reshape(-1) + + sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + + @_deprecate_positional_args + def get_block_selection( + self, + selection: BasicSelection, + *, + out: NDBuffer | None = None, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> NDArrayLike: + """Retrieve a selection of individual items, by providing the indices + (coordinates) for each selected item. + + Parameters + ---------- + selection : int or slice or tuple of int or slice + An integer (coordinate) or slice for each dimension of the array. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + + Returns + ------- + NDArrayLike + An array-like containing the data for the requested block selection. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(3, 3), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve items by specifying their block coordinates:: + + >>> z.get_block_selection((1, slice(None))) + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + Which is equivalent to:: + + >>> z[3:6, :] + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + For convenience, the block selection functionality is also available via the + `blocks` property, e.g.:: + + >>> z.blocks[1] + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + Notes + ----- + Block indexing is a convenience indexing method to work on individual chunks + with chunk index slicing. It has the same concept as Dask's `Array.blocks` + indexing. + + Slices are supported. However, only with a step size of one. + + Block index arrays may be multidimensional to index multidimensional arrays. + For example:: + + >>> z.blocks[0, 1:3] + array([[ 3, 4, 5, 6, 7, 8], + [13, 14, 15, 16, 17, 18], + [23, 24, 25, 26, 27, 28]]) + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + set_coordinate_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if prototype is None: + prototype = default_buffer_prototype() + indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) + return sync( + self._async_array._get_selection( + indexer=indexer, out=out, fields=fields, prototype=prototype + ) + ) + + @_deprecate_positional_args + def set_block_selection( + self, + selection: BasicSelection, + value: npt.ArrayLike, + *, + fields: Fields | None = None, + prototype: BufferPrototype | None = None, + ) -> None: + """Modify a selection of individual blocks, by providing the chunk indices + (coordinates) for each block to be modified. + + Parameters + ---------- + selection : tuple + An integer (coordinate) or slice for each dimension of the array. + value : npt.ArrayLike + An array-like containing the data to be stored in the block selection. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Examples + -------- + Set up a 2-dimensional array:: + + >>> import zarr + >>> z = zarr.zeros( + >>> shape=(6, 6), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(2, 2), + >>> dtype="i4", + >>> ) + + Set data for a selection of items:: + + >>> z.set_block_selection((1, 0), 1) + >>> z[...] + array([[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]]) + + For convenience, this functionality is also available via the `blocks` property. + E.g.:: + + >>> z.blocks[2, 1] = 4 + >>> z[...] + array([[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [0, 0, 4, 4, 0, 0], + [0, 0, 4, 4, 0, 0]]) + + >>> z.blocks[:, 2] = 7 + >>> z[...] + array([[0, 0, 0, 0, 7, 7], + [0, 0, 0, 0, 7, 7], + [1, 1, 0, 0, 7, 7], + [1, 1, 0, 0, 7, 7], + [0, 0, 4, 4, 7, 7], + [0, 0, 4, 4, 7, 7]]) + + Notes + ----- + Block indexing is a convenience indexing method to work on individual chunks + with chunk index slicing. It has the same concept as Dask's `Array.blocks` + indexing. + + Slices are supported. However, only with a step size of one. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if prototype is None: + prototype = default_buffer_prototype() + indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) + sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + + @property + def vindex(self) -> VIndex: + """Shortcut for vectorized (inner) indexing, see :func:`get_coordinate_selection`, + :func:`set_coordinate_selection`, :func:`get_mask_selection` and + :func:`set_mask_selection` for documentation and examples.""" + return VIndex(self) + + @property + def oindex(self) -> OIndex: + """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and + :func:`set_orthogonal_selection` for documentation and examples.""" + return OIndex(self) + + @property + def blocks(self) -> BlockIndex: + """Shortcut for blocked chunked indexing, see :func:`get_block_selection` and + :func:`set_block_selection` for documentation and examples.""" + return BlockIndex(self) + + def resize(self, new_shape: ChunkCoords) -> Array: + """ + Change the shape of the array by growing or shrinking one or more + dimensions. + + This method does not modify the original Array object. Instead, it returns a new Array + with the specified shape. + + Notes + ----- + When resizing an array, the data are not rearranged in any way. + + If one or more dimensions are shrunk, any chunks falling outside the + new array shape will be deleted from the underlying store. + However, it is noteworthy that the chunks partially falling inside the new array + (i.e. boundary chunks) will remain intact, and therefore, + the data falling outside the new array but inside the boundary chunks + would be restored by a subsequent resize operation that grows the array size. + + Examples + -------- + >>> import zarr + >>> z = zarr.zeros(shape=(10000, 10000), + >>> chunk_shape=(1000, 1000), + >>> store=StorePath(MemoryStore(mode="w")), + >>> dtype="i4",) + >>> z.shape + (10000, 10000) + >>> z = z.resize(20000, 1000) + >>> z.shape + (20000, 1000) + >>> z2 = z.resize(50, 50) + >>> z.shape + (20000, 1000) + >>> z2.shape + (50, 50) + """ + resized = sync(self._async_array.resize(new_shape)) + # TODO: remove this cast when type inference improves + _resized = cast(AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], resized) + return type(self)(_resized) + + def update_attributes(self, new_attributes: dict[str, JSON]) -> Array: + # TODO: remove this cast when type inference improves + new_array = sync(self._async_array.update_attributes(new_attributes)) + # TODO: remove this cast when type inference improves + _new_array = cast(AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], new_array) + return type(self)(_new_array) + + def __repr__(self) -> str: + return f"" + + def info(self) -> None: + return sync( + self._async_array.info(), + ) + + +def nchunks_initialized( + array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array, +) -> int: + """ + Calculate the number of chunks that have been initialized, i.e. the number of chunks that have + been persisted to the storage backend. + + Parameters + ---------- + array : Array + The array to inspect. + + Returns + ------- + nchunks_initialized : int + The number of chunks that have been initialized. + + See Also + -------- + chunks_initialized + """ + return len(chunks_initialized(array)) + + +def chunks_initialized( + array: Array | AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], +) -> tuple[str, ...]: + """ + Return the keys of the chunks that have been persisted to the storage backend. + + Parameters + ---------- + array : Array + The array to inspect. + + Returns + ------- + chunks_initialized : tuple[str, ...] + The keys of the chunks that have been initialized. + + See Also + -------- + nchunks_initialized + + """ + # TODO: make this compose with the underlying async iterator + store_contents = list( + collect_aiterator(array.store_path.store.list_prefix(prefix=array.store_path.path)) + ) + out: list[str] = [] + + for chunk_key in array._iter_chunk_keys(): + if chunk_key in store_contents: + out.append(chunk_key) + + return tuple(out) + + +def _build_parents( + node: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup, +) -> list[AsyncGroup]: + from zarr.core.group import AsyncGroup, GroupMetadata + + store = node.store_path.store + path = node.store_path.path + if not path: + return [] + + required_parts = path.split("/")[:-1] + parents = [ + # the root group + AsyncGroup( + metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), + store_path=StorePath(store=store, path=""), + ) + ] + + for i, part in enumerate(required_parts): + p = "/".join(required_parts[:i] + [part]) + parents.append( + AsyncGroup( + metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), + store_path=StorePath(store=store, path=p), + ) + ) + + return parents diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py new file mode 100644 index 0000000000..e84a81cb05 --- /dev/null +++ b/src/zarr/core/array_spec.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Literal + +import numpy as np + +from zarr.core.common import parse_fill_value, parse_order, parse_shapelike + +if TYPE_CHECKING: + from zarr.core.buffer import BufferPrototype + from zarr.core.common import ChunkCoords + + +@dataclass(frozen=True) +class ArraySpec: + shape: ChunkCoords + dtype: np.dtype[Any] + fill_value: Any + order: Literal["C", "F"] + prototype: BufferPrototype + + def __init__( + self, + shape: ChunkCoords, + dtype: np.dtype[Any], + fill_value: Any, + order: Literal["C", "F"], + prototype: BufferPrototype, + ) -> None: + shape_parsed = parse_shapelike(shape) + dtype_parsed = np.dtype(dtype) + fill_value_parsed = parse_fill_value(fill_value) + order_parsed = parse_order(order) + + object.__setattr__(self, "shape", shape_parsed) + object.__setattr__(self, "dtype", dtype_parsed) + object.__setattr__(self, "fill_value", fill_value_parsed) + object.__setattr__(self, "order", order_parsed) + object.__setattr__(self, "prototype", prototype) + + @property + def ndim(self) -> int: + return len(self.shape) diff --git a/src/zarr/core/attributes.py b/src/zarr/core/attributes.py new file mode 100644 index 0000000000..7f9864d1b5 --- /dev/null +++ b/src/zarr/core/attributes.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from collections.abc import MutableMapping +from typing import TYPE_CHECKING + +from zarr.core.common import JSON + +if TYPE_CHECKING: + from collections.abc import Iterator + + from zarr.core.array import Array + from zarr.core.group import Group + + +class Attributes(MutableMapping[str, JSON]): + def __init__(self, obj: Array | Group) -> None: + # key=".zattrs", read_only=False, cache=True, synchronizer=None + self._obj = obj + + def __getitem__(self, key: str) -> JSON: + return self._obj.metadata.attributes[key] + + def __setitem__(self, key: str, value: JSON) -> None: + new_attrs = dict(self._obj.metadata.attributes) + new_attrs[key] = value + self._obj = self._obj.update_attributes(new_attrs) + + def __delitem__(self, key: str) -> None: + new_attrs = dict(self._obj.metadata.attributes) + del new_attrs[key] + self._obj = self._obj.update_attributes(new_attrs) + + def __iter__(self) -> Iterator[str]: + return iter(self._obj.metadata.attributes) + + def __len__(self) -> int: + return len(self._obj.metadata.attributes) + + def put(self, d: dict[str, JSON]) -> None: + """ + Overwrite all attributes with the values from `d`. + + Equivalent to the following pseudo-code, but performed atomically. + + .. code-block:: python + + >>> attrs = {"a": 1, "b": 2} + >>> attrs.clear() + >>> attrs.update({"a": 3", "c": 4}) + >>> attrs + {'a': 3, 'c': 4} + """ + self._obj = self._obj.update_attributes(d) + + def asdict(self) -> dict[str, JSON]: + return dict(self._obj.metadata.attributes) diff --git a/src/zarr/core/buffer/__init__.py b/src/zarr/core/buffer/__init__.py new file mode 100644 index 0000000000..ccb41e291c --- /dev/null +++ b/src/zarr/core/buffer/__init__.py @@ -0,0 +1,19 @@ +from zarr.core.buffer.core import ( + ArrayLike, + Buffer, + BufferPrototype, + NDArrayLike, + NDBuffer, + default_buffer_prototype, +) +from zarr.core.buffer.cpu import numpy_buffer_prototype + +__all__ = [ + "ArrayLike", + "Buffer", + "BufferPrototype", + "NDArrayLike", + "NDBuffer", + "default_buffer_prototype", + "numpy_buffer_prototype", +] diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py new file mode 100644 index 0000000000..1fbf58c618 --- /dev/null +++ b/src/zarr/core/buffer/core.py @@ -0,0 +1,504 @@ +from __future__ import annotations + +import sys +from abc import ABC, abstractmethod +from typing import ( + TYPE_CHECKING, + Any, + Literal, + NamedTuple, + Protocol, + SupportsIndex, + cast, + runtime_checkable, +) + +import numpy as np +import numpy.typing as npt + +from zarr.registry import ( + get_buffer_class, + get_ndbuffer_class, +) + +if TYPE_CHECKING: + from collections.abc import Iterable, Sequence + from typing import Self + + from zarr.codecs.bytes import Endian + from zarr.core.common import BytesLike, ChunkCoords + +# Everything here is imported into ``zarr.core.buffer`` namespace. +__all__: list[str] = [] + + +@runtime_checkable +class ArrayLike(Protocol): + """Protocol for the array-like type that underlie Buffer""" + + @property + def dtype(self) -> np.dtype[Any]: ... + + @property + def ndim(self) -> int: ... + + @property + def size(self) -> int: ... + + def __getitem__(self, key: slice) -> Self: ... + + def __setitem__(self, key: slice, value: Any) -> None: ... + + +@runtime_checkable +class NDArrayLike(Protocol): + """Protocol for the nd-array-like type that underlie NDBuffer""" + + @property + def dtype(self) -> np.dtype[Any]: ... + + @property + def ndim(self) -> int: ... + + @property + def size(self) -> int: ... + + @property + def shape(self) -> ChunkCoords: ... + + def __len__(self) -> int: ... + + def __getitem__(self, key: slice) -> Self: ... + + def __setitem__(self, key: slice, value: Any) -> None: ... + + def __array__(self) -> npt.NDArray[Any]: ... + + def reshape( + self, shape: ChunkCoords | Literal[-1], *, order: Literal["A", "C", "F"] = ... + ) -> Self: ... + + def view(self, dtype: npt.DTypeLike) -> Self: ... + + def astype(self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = ...) -> Self: ... + + def fill(self, value: Any) -> None: ... + + def copy(self) -> Self: ... + + def transpose(self, axes: SupportsIndex | Sequence[SupportsIndex] | None) -> Self: ... + + def ravel(self, order: Literal["K", "A", "C", "F"] = ...) -> Self: ... + + def all(self) -> bool: ... + + def __eq__(self, other: object) -> Self: # type: ignore[explicit-override, override] + """Element-wise equal + + Notes + ----- + Type checkers such as mypy complains because the return type isn't a bool like + its supertype "object", which violates the Liskov substitution principle. + This is true, but since NumPy's ndarray is defined as an element-wise equal, + our hands are tied. + """ + + +def check_item_key_is_1d_contiguous(key: Any) -> None: + """Raises error if `key` isn't a 1d contiguous slice""" + if not isinstance(key, slice): + raise TypeError( + f"Item key has incorrect type (expected slice, got {key.__class__.__name__})" + ) + if not (key.step is None or key.step == 1): + raise ValueError("slice must be contiguous") + + +class Buffer(ABC): + """A flat contiguous memory block + + We use Buffer throughout Zarr to represent a contiguous block of memory. + + A Buffer is backed by a underlying array-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + array-like instance can be copied/converted to a regular Numpy array + (host memory). + + Notes + ----- + This buffer is untyped, so all indexing and sizes are in bytes. + + Parameters + ---------- + array_like + array-like object that must be 1-dim, contiguous, and byte dtype. + """ + + def __init__(self, array_like: ArrayLike) -> None: + if array_like.ndim != 1: + raise ValueError("array_like: only 1-dim allowed") + if array_like.dtype != np.dtype("b"): + raise ValueError("array_like: only byte dtype allowed") + self._data = array_like + + @classmethod + @abstractmethod + def create_zero_length(cls) -> Self: + """Create an empty buffer with length zero + + Returns + ------- + New empty 0-length buffer + """ + if cls is Buffer: + raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'") + return cls( + cast(ArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker + + @classmethod + def from_array_like(cls, array_like: ArrayLike) -> Self: + """Create a new buffer of an array-like object + + Parameters + ---------- + array_like + array-like object that must be 1-dim, contiguous, and byte dtype. + + Returns + ------- + New buffer representing `array_like` + """ + return cls(array_like) + + @classmethod + @abstractmethod + def from_buffer(cls, buffer: Buffer) -> Self: + """Create a new buffer of an existing Buffer + + This is useful if you want to ensure that an existing buffer is + of the correct subclass of Buffer. E.g., MemoryStore uses this + to return a buffer instance of the subclass specified by its + BufferPrototype argument. + + Typically, this only copies data if the data has to be moved between + memory types, such as from host to device memory. + + Parameters + ---------- + buffer + buffer object. + + Returns + ------- + A new buffer representing the content of the input buffer + + Notes + ----- + Subclasses of `Buffer` must override this method to implement + more optimal conversions that avoid copies where possible + """ + if cls is Buffer: + raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'") + return cls( + cast(ArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker + + @classmethod + @abstractmethod + def from_bytes(cls, bytes_like: BytesLike) -> Self: + """Create a new buffer of a bytes-like object (host memory) + + Parameters + ---------- + bytes_like + bytes-like object + + Returns + ------- + New buffer representing `bytes_like` + """ + if cls is Buffer: + raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'") + return cls( + cast(ArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker + + def as_array_like(self) -> ArrayLike: + """Returns the underlying array (host or device memory) of this buffer + + This will never copy data. + + Returns + ------- + The underlying 1d array such as a NumPy or CuPy array. + """ + return self._data + + @abstractmethod + def as_numpy_array(self) -> npt.NDArray[Any]: + """Returns the buffer as a NumPy array (host memory). + + Notes + ----- + Might have to copy data, consider using `.as_array_like()` instead. + + Returns + ------- + NumPy array of this buffer (might be a data copy) + """ + ... + + def to_bytes(self) -> bytes: + """Returns the buffer as `bytes` (host memory). + + Warnings + -------- + Will always copy data, only use this method for small buffers such as metadata + buffers. If possible, use `.as_numpy_array()` or `.as_array_like()` instead. + + Returns + ------- + `bytes` of this buffer (data copy) + """ + return bytes(self.as_numpy_array()) + + def __getitem__(self, key: slice) -> Self: + check_item_key_is_1d_contiguous(key) + return self.__class__(self._data.__getitem__(key)) + + def __setitem__(self, key: slice, value: Any) -> None: + check_item_key_is_1d_contiguous(key) + self._data.__setitem__(key, value) + + def __len__(self) -> int: + return self._data.size + + @abstractmethod + def __add__(self, other: Buffer) -> Self: + """Concatenate two buffers""" + ... + + def __eq__(self, other: object) -> bool: + # Another Buffer class can override this to choose a more efficient path + return isinstance(other, Buffer) and np.array_equal( + self.as_numpy_array(), other.as_numpy_array() + ) + + +class NDBuffer: + """An n-dimensional memory block + + We use NDBuffer throughout Zarr to represent a n-dimensional memory block. + + A NDBuffer is backed by a underlying ndarray-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + ndarray-like instance can be copied/converted to a regular Numpy array + (host memory). + + Notes + ----- + The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer + is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + in order to use Python's type system to differentiate between the contiguous + Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the + two classes separate. + + Parameters + ---------- + ndarray_like + ndarray-like object that is convertible to a regular Numpy array. + """ + + def __init__(self, array: NDArrayLike) -> None: + self._data = array + + @classmethod + @abstractmethod + def create( + cls, + *, + shape: Iterable[int], + dtype: npt.DTypeLike, + order: Literal["C", "F"] = "C", + fill_value: Any | None = None, + ) -> Self: + """Create a new buffer and its underlying ndarray-like object + + Parameters + ---------- + shape + The shape of the buffer and its underlying ndarray-like object + dtype + The datatype of the buffer and its underlying ndarray-like object + order + Whether to store multi-dimensional data in row-major (C-style) or + column-major (Fortran-style) order in memory. + fill_value + If not None, fill the new buffer with a scalar value. + + Returns + ------- + New buffer representing a new ndarray_like object + + Notes + ----- + A subclass can overwrite this method to create a ndarray-like object + other then the default Numpy array. + """ + if cls is NDBuffer: + raise NotImplementedError( + "Cannot call abstract method on the abstract class 'NDBuffer'" + ) + return cls( + cast(NDArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker + + @classmethod + def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self: + """Create a new buffer of a ndarray-like object + + Parameters + ---------- + ndarray_like + ndarray-like object + + Returns + ------- + New buffer representing `ndarray_like` + """ + return cls(ndarray_like) + + @classmethod + @abstractmethod + def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: + """Create a new buffer of Numpy array-like object + + Parameters + ---------- + array_like + Object that can be coerced into a Numpy array + + Returns + ------- + New buffer representing `array_like` + """ + if cls is NDBuffer: + raise NotImplementedError( + "Cannot call abstract method on the abstract class 'NDBuffer'" + ) + return cls( + cast(NDArrayLike, None) + ) # This line will never be reached, but it satisfies the type checker + + def as_ndarray_like(self) -> NDArrayLike: + """Returns the underlying array (host or device memory) of this buffer + + This will never copy data. + + Returns + ------- + The underlying array such as a NumPy or CuPy array. + """ + return self._data + + @abstractmethod + def as_numpy_array(self) -> npt.NDArray[Any]: + """Returns the buffer as a NumPy array (host memory). + + Warnings + -------- + Might have to copy data, consider using `.as_ndarray_like()` instead. + + Returns + ------- + NumPy array of this buffer (might be a data copy) + """ + ... + + @property + def dtype(self) -> np.dtype[Any]: + return self._data.dtype + + @property + def shape(self) -> tuple[int, ...]: + return self._data.shape + + @property + def byteorder(self) -> Endian: + from zarr.codecs.bytes import Endian + + if self.dtype.byteorder == "<": + return Endian.little + elif self.dtype.byteorder == ">": + return Endian.big + else: + return Endian(sys.byteorder) + + def reshape(self, newshape: ChunkCoords | Literal[-1]) -> Self: + return self.__class__(self._data.reshape(newshape)) + + def squeeze(self, axis: tuple[int, ...]) -> Self: + newshape = tuple(a for i, a in enumerate(self.shape) if i not in axis) + return self.__class__(self._data.reshape(newshape)) + + def astype(self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = "K") -> Self: + return self.__class__(self._data.astype(dtype=dtype, order=order)) + + @abstractmethod + def __getitem__(self, key: Any) -> Self: ... + + @abstractmethod + def __setitem__(self, key: Any, value: Any) -> None: ... + + def __len__(self) -> int: + return self._data.__len__() + + def __repr__(self) -> str: + return f"" + + def all_equal(self, other: Any, equal_nan: bool = True) -> bool: + """Compare to `other` using np.array_equal.""" + if other is None: + # Handle None fill_value for Zarr V2 + return False + # use array_equal to obtain equal_nan=True functionality + # Since fill-value is a scalar, isn't there a faster path than allocating a new array for fill value + # every single time we have to write data? + _data, other = np.broadcast_arrays(self._data, other) + return np.array_equal( + self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "USTO" else False + ) + + def fill(self, value: Any) -> None: + self._data.fill(value) + + def copy(self) -> Self: + return self.__class__(self._data.copy()) + + def transpose(self, axes: SupportsIndex | Sequence[SupportsIndex] | None) -> Self: + return self.__class__(self._data.transpose(axes)) + + +class BufferPrototype(NamedTuple): + """Prototype of the Buffer and NDBuffer class + + The protocol must be pickable. + + Attributes + ---------- + buffer + The Buffer class to use when Zarr needs to create new Buffer. + nd_buffer + The NDBuffer class to use when Zarr needs to create new NDBuffer. + """ + + buffer: type[Buffer] + nd_buffer: type[NDBuffer] + + +# The default buffer prototype used throughout the Zarr codebase. +def default_buffer_prototype() -> BufferPrototype: + return BufferPrototype(buffer=get_buffer_class(), nd_buffer=get_ndbuffer_class()) diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py new file mode 100644 index 0000000000..187e2d82dc --- /dev/null +++ b/src/zarr/core/buffer/cpu.py @@ -0,0 +1,227 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Literal, +) + +import numpy as np +import numpy.typing as npt + +from zarr.core.buffer import core +from zarr.registry import ( + register_buffer, + register_ndbuffer, +) + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable + from typing import Self + + from zarr.core.buffer.core import ArrayLike, NDArrayLike + from zarr.core.common import BytesLike + + +class Buffer(core.Buffer): + """A flat contiguous memory block + + We use Buffer throughout Zarr to represent a contiguous block of memory. + + A Buffer is backed by a underlying array-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + array-like instance can be copied/converted to a regular Numpy array + (host memory). + + Notes + ----- + This buffer is untyped, so all indexing and sizes are in bytes. + + Parameters + ---------- + array_like + array-like object that must be 1-dim, contiguous, and byte dtype. + """ + + def __init__(self, array_like: ArrayLike) -> None: + super().__init__(array_like) + + @classmethod + def create_zero_length(cls) -> Self: + return cls(np.array([], dtype="b")) + + @classmethod + def from_buffer(cls, buffer: core.Buffer) -> Self: + """Create a new buffer of an existing Buffer + + This is useful if you want to ensure that an existing buffer is + of the correct subclass of Buffer. E.g., MemoryStore uses this + to return a buffer instance of the subclass specified by its + BufferPrototype argument. + + Typically, this only copies data if the data has to be moved between + memory types, such as from host to device memory. + + Parameters + ---------- + buffer + buffer object. + + Returns + ------- + A new buffer representing the content of the input buffer + + Notes + ----- + Subclasses of `Buffer` must override this method to implement + more optimal conversions that avoid copies where possible + """ + return cls.from_array_like(buffer.as_numpy_array()) + + @classmethod + def from_bytes(cls, bytes_like: BytesLike) -> Self: + """Create a new buffer of a bytes-like object (host memory) + + Parameters + ---------- + bytes_like + bytes-like object + + Returns + ------- + New buffer representing `bytes_like` + """ + return cls.from_array_like(np.frombuffer(bytes_like, dtype="b")) + + def as_numpy_array(self) -> npt.NDArray[Any]: + """Returns the buffer as a NumPy array (host memory). + + Notes + ----- + Might have to copy data, consider using `.as_array_like()` instead. + + Returns + ------- + NumPy array of this buffer (might be a data copy) + """ + return np.asanyarray(self._data) + + def __add__(self, other: core.Buffer) -> Self: + """Concatenate two buffers""" + + other_array = other.as_array_like() + assert other_array.dtype == np.dtype("b") + return self.__class__( + np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array))) + ) + + +class NDBuffer(core.NDBuffer): + """An n-dimensional memory block + + We use NDBuffer throughout Zarr to represent a n-dimensional memory block. + + A NDBuffer is backed by a underlying ndarray-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + ndarray-like instance can be copied/converted to a regular Numpy array + (host memory). + + Notes + ----- + The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer + is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + in order to use Python's type system to differentiate between the contiguous + Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the + two classes separate. + + Parameters + ---------- + ndarray_like + ndarray-like object that is convertible to a regular Numpy array. + """ + + def __init__(self, array: NDArrayLike) -> None: + super().__init__(array) + + @classmethod + def create( + cls, + *, + shape: Iterable[int], + dtype: npt.DTypeLike, + order: Literal["C", "F"] = "C", + fill_value: Any | None = None, + ) -> Self: + ret = cls(np.empty(shape=tuple(shape), dtype=dtype, order=order)) + if fill_value is not None: + ret.fill(fill_value) + return ret + + @classmethod + def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: + return cls.from_ndarray_like(np.asanyarray(array_like)) + + def as_numpy_array(self) -> npt.NDArray[Any]: + """Returns the buffer as a NumPy array (host memory). + + Warnings + -------- + Might have to copy data, consider using `.as_ndarray_like()` instead. + + Returns + ------- + NumPy array of this buffer (might be a data copy) + """ + return np.asanyarray(self._data) + + def __getitem__(self, key: Any) -> Self: + return self.__class__(np.asanyarray(self._data.__getitem__(key))) + + def __setitem__(self, key: Any, value: Any) -> None: + if isinstance(value, NDBuffer): + value = value._data + self._data.__setitem__(key, value) + + +def as_numpy_array_wrapper( + func: Callable[[npt.NDArray[Any]], bytes], buf: core.Buffer, prototype: core.BufferPrototype +) -> core.Buffer: + """Converts the input of `func` to a numpy array and the output back to `Buffer`. + + This function is useful when calling a `func` that only support host memory such + as `GZip.decode` and `Blosc.decode`. In this case, use this wrapper to convert + the input `buf` to a Numpy array and convert the result back into a `Buffer`. + + Parameters + ---------- + func + The callable that will be called with the converted `buf` as input. + `func` must return bytes, which will be converted into a `Buffer` + before returned. + buf + The buffer that will be converted to a Numpy array before given as + input to `func`. + prototype + The prototype of the output buffer. + + Returns + ------- + The result of `func` converted to a `Buffer` + """ + return prototype.buffer.from_bytes(func(buf.as_numpy_array())) + + +# CPU buffer prototype using numpy arrays +buffer_prototype = core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) +# default_buffer_prototype = buffer_prototype + + +# The numpy prototype used for E.g. when reading the shard index +def numpy_buffer_prototype() -> core.BufferPrototype: + return core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) + + +register_buffer(Buffer) +register_ndbuffer(NDBuffer) diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py new file mode 100644 index 0000000000..d5daba0e9a --- /dev/null +++ b/src/zarr/core/buffer/gpu.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +import warnings +from typing import ( + TYPE_CHECKING, + Any, + Literal, + cast, +) + +import numpy as np +import numpy.typing as npt + +from zarr.core.buffer import core +from zarr.core.buffer.core import ArrayLike, BufferPrototype, NDArrayLike + +if TYPE_CHECKING: + from collections.abc import Iterable + from typing import Self + + from zarr.core.common import BytesLike + +try: + import cupy as cp +except ImportError: + cp = None + + +class Buffer(core.Buffer): + """A flat contiguous memory block on the GPU + + We use Buffer throughout Zarr to represent a contiguous block of memory. + + A Buffer is backed by a underlying array-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + array-like instance can be copied/converted to a regular Numpy array + (host memory). + + Notes + ----- + This buffer is untyped, so all indexing and sizes are in bytes. + + Parameters + ---------- + array_like + array-like object that must be 1-dim, contiguous, and byte dtype. + """ + + def __init__(self, array_like: ArrayLike) -> None: + if cp is None: + raise ImportError( + "Cannot use zarr.buffer.gpu.Buffer without cupy. Please install cupy." + ) + + if array_like.ndim != 1: + raise ValueError("array_like: only 1-dim allowed") + if array_like.dtype != np.dtype("b"): + raise ValueError("array_like: only byte dtype allowed") + + if not hasattr(array_like, "__cuda_array_interface__"): + # Slow copy based path for arrays that don't support the __cuda_array_interface__ + # TODO: Add a fast zero-copy path for arrays that support the dlpack protocol + msg = ( + "Creating a zarr.buffer.gpu.Buffer with an array that does not support the " + "__cuda_array_interface__ for zero-copy transfers, " + "falling back to slow copy based path" + ) + warnings.warn( + msg, + stacklevel=2, + ) + self._data = cp.asarray(array_like) + + @classmethod + def create_zero_length(cls) -> Self: + """Create an empty buffer with length zero + + Returns + ------- + New empty 0-length buffer + """ + return cls(cp.array([], dtype="b")) + + @classmethod + def from_buffer(cls, buffer: core.Buffer) -> Self: + """Create an GPU Buffer given an arbitrary Buffer + This will try to be zero-copy if `buffer` is already on the + GPU and will trigger a copy if not. + + Returns + ------- + New GPU Buffer constructed from `buffer` + """ + return cls(buffer.as_array_like()) + + @classmethod + def from_bytes(cls, bytes_like: BytesLike) -> Self: + return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b")) + + def as_numpy_array(self) -> npt.NDArray[Any]: + return cast(npt.NDArray[Any], cp.asnumpy(self._data)) + + def __add__(self, other: core.Buffer) -> Self: + other_array = other.as_array_like() + assert other_array.dtype == np.dtype("b") + gpu_other = Buffer(other_array) + gpu_other_array = gpu_other.as_array_like() + return self.__class__( + cp.concatenate((cp.asanyarray(self._data), cp.asanyarray(gpu_other_array))) + ) + + +class NDBuffer(core.NDBuffer): + """A n-dimensional memory block on the GPU + + We use NDBuffer throughout Zarr to represent a n-dimensional memory block. + + A NDBuffer is backed by a underlying ndarray-like instance that represents + the memory. The memory type is unspecified; can be regular host memory, + CUDA device memory, or something else. The only requirement is that the + ndarray-like instance can be copied/converted to a regular Numpy array + (host memory). + + Notes + ----- + The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer + is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + in order to use Python's type system to differentiate between the contiguous + Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the + two classes separate. + + Parameters + ---------- + ndarray_like + ndarray-like object that is convertible to a regular Numpy array. + """ + + def __init__(self, array: NDArrayLike) -> None: + if cp is None: + raise ImportError( + "Cannot use zarr.buffer.gpu.NDBuffer without cupy. Please install cupy." + ) + + # assert array.ndim > 0 + assert array.dtype != object + self._data = array + + if not hasattr(array, "__cuda_array_interface__"): + # Slow copy based path for arrays that don't support the __cuda_array_interface__ + # TODO: Add a fast zero-copy path for arrays that support the dlpack protocol + msg = ( + "Creating a zarr.buffer.gpu.NDBuffer with an array that does not support the " + "__cuda_array_interface__ for zero-copy transfers, " + "falling back to slow copy based path" + ) + warnings.warn( + msg, + stacklevel=2, + ) + self._data = cp.asarray(array) + + @classmethod + def create( + cls, + *, + shape: Iterable[int], + dtype: npt.DTypeLike, + order: Literal["C", "F"] = "C", + fill_value: Any | None = None, + ) -> Self: + ret = cls(cp.empty(shape=tuple(shape), dtype=dtype, order=order)) + if fill_value is not None: + ret.fill(fill_value) + return ret + + @classmethod + def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: + """Create a new buffer of Numpy array-like object + + Parameters + ---------- + array_like + Object that can be coerced into a Numpy array + + Returns + ------- + New buffer representing `array_like` + """ + return cls(cp.asarray(array_like)) + + def as_numpy_array(self) -> npt.NDArray[Any]: + """Returns the buffer as a NumPy array (host memory). + + Warnings + -------- + Might have to copy data, consider using `.as_ndarray_like()` instead. + + Returns + ------- + NumPy array of this buffer (might be a data copy) + """ + return cast(npt.NDArray[Any], cp.asnumpy(self._data)) + + def __getitem__(self, key: Any) -> Self: + return self.__class__(self._data.__getitem__(key)) + + def __setitem__(self, key: Any, value: Any) -> None: + if isinstance(value, NDBuffer): + value = value._data + elif isinstance(value, core.NDBuffer): + gpu_value = NDBuffer(value.as_ndarray_like()) + value = gpu_value._data + self._data.__setitem__(key, value) + + +buffer_prototype = BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py new file mode 100644 index 0000000000..77734056b3 --- /dev/null +++ b/src/zarr/core/chunk_grids.py @@ -0,0 +1,193 @@ +from __future__ import annotations + +import itertools +import math +import numbers +import operator +from abc import abstractmethod +from dataclasses import dataclass +from functools import reduce +from typing import TYPE_CHECKING, Any + +import numpy as np + +from zarr.abc.metadata import Metadata +from zarr.core.common import ( + JSON, + ChunkCoords, + ChunkCoordsLike, + ShapeLike, + parse_named_configuration, + parse_shapelike, +) +from zarr.core.indexing import ceildiv + +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Self + + +def _guess_chunks( + shape: ShapeLike, + typesize: int, + *, + increment_bytes: int = 256 * 1024, + min_bytes: int = 128 * 1024, + max_bytes: int = 64 * 1024 * 1024, +) -> ChunkCoords: + """ + Iteratively guess an appropriate chunk layout for an array, given its shape and + the size of each element in bytes, and size constraints expressed in bytes. This logic is + adapted from h5py. + + Parameters + ---------- + shape: ChunkCoords + The chunk shape. + typesize: int + The size, in bytes, of each element of the chunk. + increment_bytes: int = 256 * 1024 + The number of bytes used to increment or decrement the target chunk size in bytes. + min_bytes: int = 128 * 1024 + The soft lower bound on the final chunk size in bytes. + max_bytes: int = 64 * 1024 * 1024 + The hard upper bound on the final chunk size in bytes. + + Returns + ------- + ChunkCoords + + """ + if isinstance(shape, int): + shape = (shape,) + + ndims = len(shape) + # require chunks to have non-zero length for all dimensions + chunks = np.maximum(np.array(shape, dtype="=f8"), 1) + + # Determine the optimal chunk size in bytes using a PyTables expression. + # This is kept as a float. + dset_size = np.prod(chunks) * typesize + target_size = increment_bytes * (2 ** np.log10(dset_size / (1024.0 * 1024))) + + if target_size > max_bytes: + target_size = max_bytes + elif target_size < min_bytes: + target_size = min_bytes + + idx = 0 + while True: + # Repeatedly loop over the axes, dividing them by 2. Stop when: + # 1a. We're smaller than the target chunk size, OR + # 1b. We're within 50% of the target chunk size, AND + # 2. The chunk is smaller than the maximum chunk size + + chunk_bytes = np.prod(chunks) * typesize + + if ( + chunk_bytes < target_size or abs(chunk_bytes - target_size) / target_size < 0.5 + ) and chunk_bytes < max_bytes: + break + + if np.prod(chunks) == 1: + break # Element size larger than max_bytes + + chunks[idx % ndims] = math.ceil(chunks[idx % ndims] / 2.0) + idx += 1 + + return tuple(int(x) for x in chunks) + + +def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tuple[int, ...]: + """Convenience function to normalize the `chunks` argument for an array + with the given `shape`.""" + + # N.B., expect shape already normalized + + # handle auto-chunking + if chunks is None or chunks is True: + return _guess_chunks(shape, typesize) + + # handle no chunking + if chunks is False: + return shape + + # handle 1D convenience form + if isinstance(chunks, numbers.Integral): + chunks = tuple(int(chunks) for _ in shape) + + # handle dask-style chunks (iterable of iterables) + if all(isinstance(c, (tuple | list)) for c in chunks): + # take first chunk size for each dimension + chunks = tuple( + c[0] for c in chunks + ) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1]) + + # handle bad dimensionality + if len(chunks) > len(shape): + raise ValueError("too many dimensions in chunks") + + # handle underspecified chunks + if len(chunks) < len(shape): + # assume chunks across remaining dimensions + chunks += shape[len(chunks) :] + + # handle None or -1 in chunks + if -1 in chunks or None in chunks: + chunks = tuple( + s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks, strict=False) + ) + + return tuple(int(c) for c in chunks) + + +@dataclass(frozen=True) +class ChunkGrid(Metadata): + @classmethod + def from_dict(cls, data: dict[str, JSON] | ChunkGrid) -> ChunkGrid: + if isinstance(data, ChunkGrid): + return data + + name_parsed, _ = parse_named_configuration(data) + if name_parsed == "regular": + return RegularChunkGrid._from_dict(data) + raise ValueError(f"Unknown chunk grid. Got {name_parsed}.") + + @abstractmethod + def all_chunk_coords(self, array_shape: ChunkCoords) -> Iterator[ChunkCoords]: + pass + + @abstractmethod + def get_nchunks(self, array_shape: ChunkCoords) -> int: + pass + + +@dataclass(frozen=True) +class RegularChunkGrid(ChunkGrid): + chunk_shape: ChunkCoords + + def __init__(self, *, chunk_shape: ChunkCoordsLike) -> None: + chunk_shape_parsed = parse_shapelike(chunk_shape) + + object.__setattr__(self, "chunk_shape", chunk_shape_parsed) + + @classmethod + def _from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration(data, "regular") + + return cls(**configuration_parsed) # type: ignore[arg-type] + + def to_dict(self) -> dict[str, JSON]: + return {"name": "regular", "configuration": {"chunk_shape": tuple(self.chunk_shape)}} + + def all_chunk_coords(self, array_shape: ChunkCoords) -> Iterator[ChunkCoords]: + return itertools.product( + *(range(0, ceildiv(s, c)) for s, c in zip(array_shape, self.chunk_shape, strict=False)) + ) + + def get_nchunks(self, array_shape: ChunkCoords) -> int: + return reduce( + operator.mul, + (ceildiv(s, c) for s, c in zip(array_shape, self.chunk_shape, strict=True)), + 1, + ) diff --git a/src/zarr/core/chunk_key_encodings.py b/src/zarr/core/chunk_key_encodings.py new file mode 100644 index 0000000000..ed12ee3065 --- /dev/null +++ b/src/zarr/core/chunk_key_encodings.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from abc import abstractmethod +from dataclasses import dataclass +from typing import Literal, cast + +from zarr.abc.metadata import Metadata +from zarr.core.common import ( + JSON, + ChunkCoords, + parse_named_configuration, +) + +SeparatorLiteral = Literal[".", "/"] + + +def parse_separator(data: JSON) -> SeparatorLiteral: + if data not in (".", "/"): + raise ValueError(f"Expected an '.' or '/' separator. Got {data} instead.") + return cast(SeparatorLiteral, data) + + +@dataclass(frozen=True) +class ChunkKeyEncoding(Metadata): + name: str + separator: SeparatorLiteral = "." + + def __init__(self, *, separator: SeparatorLiteral) -> None: + separator_parsed = parse_separator(separator) + + object.__setattr__(self, "separator", separator_parsed) + + @classmethod + def from_dict(cls, data: dict[str, JSON] | ChunkKeyEncoding) -> ChunkKeyEncoding: + if isinstance(data, ChunkKeyEncoding): + return data + + # configuration is optional for chunk key encodings + name_parsed, config_parsed = parse_named_configuration(data, require_configuration=False) + if name_parsed == "default": + if config_parsed is None: + # for default, normalize missing configuration to use the "/" separator. + config_parsed = {"separator": "/"} + return DefaultChunkKeyEncoding(**config_parsed) # type: ignore[arg-type] + if name_parsed == "v2": + if config_parsed is None: + # for v2, normalize missing configuration to use the "." separator. + config_parsed = {"separator": "."} + return V2ChunkKeyEncoding(**config_parsed) # type: ignore[arg-type] + msg = f"Unknown chunk key encoding. Got {name_parsed}, expected one of ('v2', 'default')." + raise ValueError(msg) + + def to_dict(self) -> dict[str, JSON]: + return {"name": self.name, "configuration": {"separator": self.separator}} + + @abstractmethod + def decode_chunk_key(self, chunk_key: str) -> ChunkCoords: + pass + + @abstractmethod + def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: + pass + + +@dataclass(frozen=True) +class DefaultChunkKeyEncoding(ChunkKeyEncoding): + name: Literal["default"] = "default" + + def decode_chunk_key(self, chunk_key: str) -> ChunkCoords: + if chunk_key == "c": + return () + return tuple(map(int, chunk_key[1:].split(self.separator))) + + def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: + return self.separator.join(map(str, ("c",) + chunk_coords)) + + +@dataclass(frozen=True) +class V2ChunkKeyEncoding(ChunkKeyEncoding): + name: Literal["v2"] = "v2" + + def decode_chunk_key(self, chunk_key: str) -> ChunkCoords: + return tuple(map(int, chunk_key.split(self.separator))) + + def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: + chunk_identifier = self.separator.join(map(str, chunk_coords)) + return "0" if chunk_identifier == "" else chunk_identifier diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py new file mode 100644 index 0000000000..0bc6245cb5 --- /dev/null +++ b/src/zarr/core/common.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +import asyncio +import functools +import operator +from collections.abc import Iterable, Mapping +from enum import Enum +from typing import ( + TYPE_CHECKING, + Any, + Literal, + TypeVar, + cast, + overload, +) + +import numpy as np + +from zarr.core.strings import _STRING_DTYPE + +if TYPE_CHECKING: + from collections.abc import Awaitable, Callable, Iterator + + +ZARR_JSON = "zarr.json" +ZARRAY_JSON = ".zarray" +ZGROUP_JSON = ".zgroup" +ZATTRS_JSON = ".zattrs" +ZMETADATA_V2_JSON = ".zmetadata" + +ByteRangeRequest = tuple[int | None, int | None] +BytesLike = bytes | bytearray | memoryview +ShapeLike = tuple[int, ...] | int +ChunkCoords = tuple[int, ...] +ChunkCoordsLike = Iterable[int] +ZarrFormat = Literal[2, 3] +NodeType = Literal["array", "group"] +JSON = None | str | int | float | Mapping[str, "JSON"] | tuple["JSON", ...] +MemoryOrder = Literal["C", "F"] +AccessModeLiteral = Literal["r", "r+", "a", "w", "w-"] + + +def product(tup: ChunkCoords) -> int: + return functools.reduce(operator.mul, tup, 1) + + +T = TypeVar("T", bound=tuple[Any, ...]) +V = TypeVar("V") + + +async def concurrent_map( + items: Iterable[T], func: Callable[..., Awaitable[V]], limit: int | None = None +) -> list[V]: + if limit is None: + return await asyncio.gather(*[func(*item) for item in items]) + + else: + sem = asyncio.Semaphore(limit) + + async def run(item: tuple[Any]) -> V: + async with sem: + return await func(*item) + + return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items]) + + +E = TypeVar("E", bound=Enum) + + +def enum_names(enum: type[E]) -> Iterator[str]: + for item in enum: + yield item.name + + +def parse_enum(data: object, cls: type[E]) -> E: + if isinstance(data, cls): + return data + if not isinstance(data, str): + raise TypeError(f"Expected str, got {type(data)}") + if data in enum_names(cls): + return cls(data) + raise ValueError(f"Value must be one of {list(enum_names(cls))!r}. Got {data} instead.") + + +def parse_name(data: JSON, expected: str | None = None) -> str: + if isinstance(data, str): + if expected is None or data == expected: + return data + raise ValueError(f"Expected '{expected}'. Got {data} instead.") + else: + raise TypeError(f"Expected a string, got an instance of {type(data)}.") + + +def parse_configuration(data: JSON) -> JSON: + if not isinstance(data, dict): + raise TypeError(f"Expected dict, got {type(data)}") + return data + + +@overload +def parse_named_configuration( + data: JSON, expected_name: str | None = None +) -> tuple[str, dict[str, JSON]]: ... + + +@overload +def parse_named_configuration( + data: JSON, expected_name: str | None = None, *, require_configuration: bool = True +) -> tuple[str, dict[str, JSON] | None]: ... + + +def parse_named_configuration( + data: JSON, expected_name: str | None = None, *, require_configuration: bool = True +) -> tuple[str, JSON | None]: + if not isinstance(data, dict): + raise TypeError(f"Expected dict, got {type(data)}") + if "name" not in data: + raise ValueError(f"Named configuration does not have a 'name' key. Got {data}.") + name_parsed = parse_name(data["name"], expected_name) + if "configuration" in data: + configuration_parsed = parse_configuration(data["configuration"]) + elif require_configuration: + raise ValueError(f"Named configuration does not have a 'configuration' key. Got {data}.") + else: + configuration_parsed = None + return name_parsed, configuration_parsed + + +def parse_shapelike(data: int | Iterable[int]) -> tuple[int, ...]: + if isinstance(data, int): + if data < 0: + raise ValueError(f"Expected a non-negative integer. Got {data} instead") + return (data,) + try: + data_tuple = tuple(data) + except TypeError as e: + msg = f"Expected an integer or an iterable of integers. Got {data} instead." + raise TypeError(msg) from e + + if not all(isinstance(v, int) for v in data_tuple): + msg = f"Expected an iterable of integers. Got {data} instead." + raise TypeError(msg) + if not all(v > -1 for v in data_tuple): + msg = f"Expected all values to be non-negative. Got {data} instead." + raise ValueError(msg) + return data_tuple + + +def parse_fill_value(data: Any) -> Any: + # todo: real validation + return data + + +def parse_order(data: Any) -> Literal["C", "F"]: + if data in ("C", "F"): + return cast(Literal["C", "F"], data) + raise ValueError(f"Expected one of ('C', 'F'), got {data} instead.") + + +def parse_dtype(dtype: Any, zarr_format: ZarrFormat) -> np.dtype[Any]: + if dtype is str or dtype == "str": + if zarr_format == 2: + # special case as object + return np.dtype("object") + else: + return _STRING_DTYPE + return np.dtype(dtype) diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py new file mode 100644 index 0000000000..ec2c8c47a3 --- /dev/null +++ b/src/zarr/core/config.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from typing import Any, Literal, cast + +from donfig import Config as DConfig + + +class BadConfigError(ValueError): + _msg = "bad Config: %r" + + +class Config(DConfig): # type: ignore[misc] + """Will collect configuration from config files and environment variables + + Example environment variables: + Grabs environment variables of the form "ZARR_FOO__BAR_BAZ=123" and + turns these into config variables of the form ``{"foo": {"bar-baz": 123}}`` + It transforms the key and value in the following way: + + - Lower-cases the key text + - Treats ``__`` (double-underscore) as nested access + - Calls ``ast.literal_eval`` on the value + + """ + + def reset(self) -> None: + self.clear() + self.refresh() + + +# The config module is responsible for managing the configuration of zarr and is based on the Donfig python library. +# For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations +# in the registry and then select them in the config. +# e.g. an implementation of the bytes codec in a class "NewBytesCodec", requires the value of codecs.bytes.name to be +# "NewBytesCodec". +# Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations +# e.g. export ZARR_CODECS__BYTES__NAME="NewBytesCodec" +# (for more information see github.com/pytroll/donfig) +# Default values below point to the standard implementations of zarr-python +config = Config( + "zarr", + defaults=[ + { + "default_zarr_version": 3, + "array": {"order": "C"}, + "async": {"concurrency": 10, "timeout": None}, + "threading": {"max_workers": None}, + "json_indent": 2, + "codec_pipeline": { + "path": "zarr.codecs.pipeline.BatchedCodecPipeline", + "batch_size": 1, + }, + "codecs": { + "blosc": "zarr.codecs.blosc.BloscCodec", + "gzip": "zarr.codecs.gzip.GzipCodec", + "zstd": "zarr.codecs.zstd.ZstdCodec", + "bytes": "zarr.codecs.bytes.BytesCodec", + "endian": "zarr.codecs.bytes.BytesCodec", # compatibility with earlier versions of ZEP1 + "crc32c": "zarr.codecs.crc32c_.Crc32cCodec", + "sharding_indexed": "zarr.codecs.sharding.ShardingCodec", + "transpose": "zarr.codecs.transpose.TransposeCodec", + "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec", + "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec", + }, + "buffer": "zarr.core.buffer.cpu.Buffer", + "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", + } + ], +) + + +def parse_indexing_order(data: Any) -> Literal["C", "F"]: + if data in ("C", "F"): + return cast(Literal["C", "F"], data) + msg = f"Expected one of ('C', 'F'), got {data} instead." + raise ValueError(msg) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py new file mode 100644 index 0000000000..e85057e2f6 --- /dev/null +++ b/src/zarr/core/group.py @@ -0,0 +1,1804 @@ +from __future__ import annotations + +import asyncio +import itertools +import json +import logging +import warnings +from collections import defaultdict +from dataclasses import asdict, dataclass, field, fields, replace +from typing import TYPE_CHECKING, Literal, TypeVar, assert_never, cast, overload + +import numpy as np +import numpy.typing as npt +from typing_extensions import deprecated + +import zarr.api.asynchronous as async_api +from zarr.abc.metadata import Metadata +from zarr.abc.store import Store, set_or_delete +from zarr.core.array import Array, AsyncArray, _build_parents +from zarr.core.attributes import Attributes +from zarr.core.buffer import default_buffer_prototype +from zarr.core.common import ( + JSON, + ZARR_JSON, + ZARRAY_JSON, + ZATTRS_JSON, + ZGROUP_JSON, + ZMETADATA_V2_JSON, + ChunkCoords, + NodeType, + ShapeLike, + ZarrFormat, + parse_shapelike, +) +from zarr.core.config import config +from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata +from zarr.core.metadata.v3 import V3JsonEncoder +from zarr.core.sync import SyncMixin, sync +from zarr.errors import MetadataValidationError +from zarr.storage import StoreLike, make_store_path +from zarr.storage.common import StorePath, ensure_no_existing_node + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Generator, Iterable, Iterator + from typing import Any + + from zarr.abc.codec import Codec + from zarr.core.buffer import Buffer, BufferPrototype + from zarr.core.chunk_key_encodings import ChunkKeyEncoding + +logger = logging.getLogger("zarr.group") + +DefaultT = TypeVar("DefaultT") + + +def parse_zarr_format(data: Any) -> ZarrFormat: + if data in (2, 3): + return cast(Literal[2, 3], data) + msg = f"Invalid zarr_format. Expected one of 2 or 3. Got {data}." + raise ValueError(msg) + + +def parse_node_type(data: Any) -> NodeType: + if data in ("array", "group"): + return cast(Literal["array", "group"], data) + raise MetadataValidationError("node_type", "array or group", data) + + +# todo: convert None to empty dict +def parse_attributes(data: Any) -> dict[str, Any]: + if data is None: + return {} + elif isinstance(data, dict) and all(isinstance(k, str) for k in data): + return data + msg = f"Expected dict with string keys. Got {type(data)} instead." + raise TypeError(msg) + + +@overload +def _parse_async_node(node: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]) -> Array: ... + + +@overload +def _parse_async_node(node: AsyncGroup) -> Group: ... + + +def _parse_async_node( + node: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup, +) -> Array | Group: + """ + Wrap an AsyncArray in an Array, or an AsyncGroup in a Group. + """ + if isinstance(node, AsyncArray): + return Array(node) + elif isinstance(node, AsyncGroup): + return Group(node) + else: + raise TypeError(f"Unknown node type, got {type(node)}") + + +@dataclass(frozen=True) +class ConsolidatedMetadata: + """ + Consolidated Metadata for this Group. + + This stores the metadata of child nodes below this group. Any child groups + will have their consolidated metadata set appropriately. + """ + + metadata: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] + kind: Literal["inline"] = "inline" + must_understand: Literal[False] = False + + def to_dict(self) -> dict[str, JSON]: + return { + "kind": self.kind, + "must_understand": self.must_understand, + "metadata": {k: v.to_dict() for k, v in self.flattened_metadata.items()}, + } + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> ConsolidatedMetadata: + data = dict(data) + + kind = data.get("kind") + if kind != "inline": + raise ValueError(f"Consolidated metadata kind='{kind}' is not supported.") + + raw_metadata = data.get("metadata") + if not isinstance(raw_metadata, dict): + raise TypeError(f"Unexpected type for 'metadata': {type(raw_metadata)}") + + metadata: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] = {} + if raw_metadata: + for k, v in raw_metadata.items(): + if not isinstance(v, dict): + raise TypeError( + f"Invalid value for metadata items. key='{k}', type='{type(v).__name__}'" + ) + + # zarr_format is present in v2 and v3. + zarr_format = parse_zarr_format(v["zarr_format"]) + + if zarr_format == 3: + node_type = parse_node_type(v.get("node_type", None)) + if node_type == "group": + metadata[k] = GroupMetadata.from_dict(v) + elif node_type == "array": + metadata[k] = ArrayV3Metadata.from_dict(v) + else: + assert_never(node_type) + elif zarr_format == 2: + if "shape" in v: + metadata[k] = ArrayV2Metadata.from_dict(v) + else: + metadata[k] = GroupMetadata.from_dict(v) + else: + assert_never(zarr_format) + + cls._flat_to_nested(metadata) + + return cls(metadata=metadata) + + @staticmethod + def _flat_to_nested( + metadata: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata], + ) -> None: + """ + Convert a flat metadata representation to a nested one. + + Notes + ----- + Flat metadata is used when persisting the consolidated metadata. The keys + include the full path, not just the node name. The key prefixes can be + used to determine which nodes are children of which other nodes. + + Nested metadata is used in-memory. The outermost level will only have the + *immediate* children of the Group. All nested child groups will be stored + under the consolidated metadata of their immediate parent. + """ + # We have a flat mapping from {k: v} where the keys include the *full* + # path segment: + # { + # "/a/b": { group_metadata }, + # "/a/b/array-0": { array_metadata }, + # "/a/b/array-1": { array_metadata }, + # } + # + # We want to reorganize the metadata such that each Group contains the + # array metadata of its immediate children. + # In the example, the group at `/a/b` will have consolidated metadata + # for its children `array-0` and `array-1`. + # + # metadata = dict(metadata) + + keys = sorted(metadata, key=lambda k: k.count("/")) + grouped = { + k: list(v) for k, v in itertools.groupby(keys, key=lambda k: k.rsplit("/", 1)[0]) + } + + # we go top down and directly manipulate metadata. + for key, children_keys in grouped.items(): + # key is a key like "a", "a/b", "a/b/c" + # The basic idea is to find the immediate parent (so "", "a", or "a/b") + # and update that node's consolidated metadata to include the metadata + # in children_keys + *prefixes, name = key.split("/") + parent = metadata + + while prefixes: + # e.g. a/b/c has a parent "a/b". Walk through to get + # metadata["a"]["b"] + part = prefixes.pop(0) + # we can assume that parent[part] here is a group + # otherwise we wouldn't have a node with this `part` prefix. + # We can also assume that the parent node will have consolidated metadata, + # because we're walking top to bottom. + parent = parent[part].consolidated_metadata.metadata # type: ignore[union-attr] + + node = parent[name] + children_keys = list(children_keys) + + if isinstance(node, ArrayV2Metadata | ArrayV3Metadata): + # These are already present, either thanks to being an array in the + # root, or by being collected as a child in the else clause + continue + children_keys = list(children_keys) + # We pop from metadata, since we're *moving* this under group + children = { + child_key.split("/")[-1]: metadata.pop(child_key) + for child_key in children_keys + if child_key != key + } + parent[name] = replace( + node, consolidated_metadata=ConsolidatedMetadata(metadata=children) + ) + + @property + def flattened_metadata(self) -> dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata]: + """ + Return the flattened representation of Consolidated Metadata. + + The returned dictionary will have a key for each child node in the hierarchy + under this group. Under the default (nested) representation available through + ``self.metadata``, the dictionary only contains keys for immediate children. + + The keys of the dictionary will include the full path to a child node from + the current group, where segments are joined by ``/``. + + Examples + -------- + >>> cm = ConsolidatedMetadata( + ... metadata={ + ... "group-0": GroupMetadata( + ... consolidated_metadata=ConsolidatedMetadata( + ... { + ... "group-0-0": GroupMetadata(), + ... } + ... ) + ... ), + ... "group-1": GroupMetadata(), + ... } + ... ) + {'group-0': GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group'), + 'group-0/group-0-0': GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group'), + 'group-1': GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group')} + """ + metadata = {} + + def flatten( + key: str, group: GroupMetadata | ArrayV2Metadata | ArrayV3Metadata + ) -> dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata]: + children: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] = {} + if isinstance(group, ArrayV2Metadata | ArrayV3Metadata): + children[key] = group + else: + if group.consolidated_metadata and group.consolidated_metadata.metadata is not None: + children[key] = replace( + group, consolidated_metadata=ConsolidatedMetadata(metadata={}) + ) + for name, val in group.consolidated_metadata.metadata.items(): + full_key = f"{key}/{name}" + if isinstance(val, GroupMetadata): + children.update(flatten(full_key, val)) + else: + children[full_key] = val + else: + children[key] = replace(group, consolidated_metadata=None) + return children + + for k, v in self.metadata.items(): + metadata.update(flatten(k, v)) + + return metadata + + +@dataclass(frozen=True) +class GroupMetadata(Metadata): + attributes: dict[str, Any] = field(default_factory=dict) + zarr_format: ZarrFormat = 3 + consolidated_metadata: ConsolidatedMetadata | None = None + node_type: Literal["group"] = field(default="group", init=False) + + def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: + json_indent = config.get("json_indent") + if self.zarr_format == 3: + return { + ZARR_JSON: prototype.buffer.from_bytes( + json.dumps(self.to_dict(), cls=V3JsonEncoder).encode() + ) + } + else: + items = { + ZGROUP_JSON: prototype.buffer.from_bytes( + json.dumps({"zarr_format": self.zarr_format}, indent=json_indent).encode() + ), + ZATTRS_JSON: prototype.buffer.from_bytes( + json.dumps(self.attributes, indent=json_indent).encode() + ), + } + if self.consolidated_metadata: + d = { + ZGROUP_JSON: {"zarr_format": self.zarr_format}, + ZATTRS_JSON: self.attributes, + } + consolidated_metadata = self.consolidated_metadata.to_dict()["metadata"] + assert isinstance(consolidated_metadata, dict) + for k, v in consolidated_metadata.items(): + attrs = v.pop("attributes", None) + d[f"{k}/{ZATTRS_JSON}"] = attrs + if "shape" in v: + # it's an array + d[f"{k}/{ZARRAY_JSON}"] = v + else: + d[f"{k}/{ZGROUP_JSON}"] = { + "zarr_format": self.zarr_format, + "consolidated_metadata": { + "metadata": {}, + "must_understand": False, + "kind": "inline", + }, + } + + items[ZMETADATA_V2_JSON] = prototype.buffer.from_bytes( + json.dumps( + {"metadata": d, "zarr_consolidated_format": 1}, + cls=V3JsonEncoder, + ).encode() + ) + + return items + + def __init__( + self, + attributes: dict[str, Any] | None = None, + zarr_format: ZarrFormat = 3, + consolidated_metadata: ConsolidatedMetadata | None = None, + ) -> None: + attributes_parsed = parse_attributes(attributes) + zarr_format_parsed = parse_zarr_format(zarr_format) + + object.__setattr__(self, "attributes", attributes_parsed) + object.__setattr__(self, "zarr_format", zarr_format_parsed) + object.__setattr__(self, "consolidated_metadata", consolidated_metadata) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> GroupMetadata: + data = dict(data) + assert data.pop("node_type", None) in ("group", None) + consolidated_metadata = data.pop("consolidated_metadata", None) + if consolidated_metadata: + data["consolidated_metadata"] = ConsolidatedMetadata.from_dict(consolidated_metadata) + + zarr_format = data.get("zarr_format") + if zarr_format == 2 or zarr_format is None: + # zarr v2 allowed arbitrary keys here. + # We don't want the GroupMetadata constructor to fail just because someone put an + # extra key in the metadata. + expected = {x.name for x in fields(cls)} + data = {k: v for k, v in data.items() if k in expected} + + return cls(**data) + + def to_dict(self) -> dict[str, Any]: + result = asdict(replace(self, consolidated_metadata=None)) + if self.consolidated_metadata: + result["consolidated_metadata"] = self.consolidated_metadata.to_dict() + return result + + +@dataclass(frozen=True) +class AsyncGroup: + metadata: GroupMetadata + store_path: StorePath + + @classmethod + async def from_store( + cls, + store: StoreLike, + *, + attributes: dict[str, Any] | None = None, + exists_ok: bool = False, + zarr_format: ZarrFormat = 3, + ) -> AsyncGroup: + store_path = await make_store_path(store) + if not exists_ok: + await ensure_no_existing_node(store_path, zarr_format=zarr_format) + attributes = attributes or {} + group = cls( + metadata=GroupMetadata(attributes=attributes, zarr_format=zarr_format), + store_path=store_path, + ) + await group._save_metadata(ensure_parents=True) + return group + + @classmethod + async def open( + cls, + store: StoreLike, + zarr_format: Literal[2, 3, None] = 3, + use_consolidated: bool | str | None = None, + ) -> AsyncGroup: + """ + Open a new AsyncGroup + + Parameters + ---------- + store: StoreLike + zarr_format: {2, 3}, optional + use_consolidated: bool or str, default None + Whether to use consolidated metadata. + + By default, consolidated metadata is used if it's present in the + store (in the ``zarr.json`` for Zarr v3 and in the ``.zmetadata`` file + for Zarr v2). + + To explicitly require consolidated metadata, set ``use_consolidated=True``, + which will raise an exception if consolidated metadata is not found. + + To explicitly *not* use consolidated metadata, set ``use_consolidated=False``, + which will fall back to using the regular, non consolidated metadata. + + Zarr v2 allowed configuring the key storing the consolidated metadata + (``.zmetadata`` by default). Specify the custom key as ``use_consolidated`` + to load consolidated metadata from a non-default key. + """ + store_path = await make_store_path(store) + + consolidated_key = ZMETADATA_V2_JSON + + if (zarr_format == 2 or zarr_format is None) and isinstance(use_consolidated, str): + consolidated_key = use_consolidated + + if zarr_format == 2: + paths = [store_path / ZGROUP_JSON, store_path / ZATTRS_JSON] + if use_consolidated or use_consolidated is None: + paths.append(store_path / consolidated_key) + + zgroup_bytes, zattrs_bytes, *rest = await asyncio.gather( + *[path.get() for path in paths] + ) + if zgroup_bytes is None: + raise FileNotFoundError(store_path) + + if use_consolidated or use_consolidated is None: + maybe_consolidated_metadata_bytes = rest[0] + + else: + maybe_consolidated_metadata_bytes = None + + elif zarr_format == 3: + zarr_json_bytes = await (store_path / ZARR_JSON).get() + if zarr_json_bytes is None: + raise FileNotFoundError(store_path) + elif zarr_format is None: + ( + zarr_json_bytes, + zgroup_bytes, + zattrs_bytes, + maybe_consolidated_metadata_bytes, + ) = await asyncio.gather( + (store_path / ZARR_JSON).get(), + (store_path / ZGROUP_JSON).get(), + (store_path / ZATTRS_JSON).get(), + (store_path / str(consolidated_key)).get(), + ) + if zarr_json_bytes is not None and zgroup_bytes is not None: + # TODO: revisit this exception type + # alternatively, we could warn and favor v3 + raise ValueError("Both zarr.json and .zgroup objects exist") + if zarr_json_bytes is None and zgroup_bytes is None: + raise FileNotFoundError( + f"could not find zarr.json or .zgroup objects in {store_path}" + ) + # set zarr_format based on which keys were found + if zarr_json_bytes is not None: + zarr_format = 3 + else: + zarr_format = 2 + else: + raise MetadataValidationError("zarr_format", "2, 3, or None", zarr_format) + + if zarr_format == 2: + # this is checked above, asserting here for mypy + assert zgroup_bytes is not None + + if use_consolidated and maybe_consolidated_metadata_bytes is None: + # the user requested consolidated metadata, but it was missing + raise ValueError(consolidated_key) + + elif use_consolidated is False: + # the user explicitly opted out of consolidated_metadata. + # Discard anything we might have read. + maybe_consolidated_metadata_bytes = None + + return cls._from_bytes_v2( + store_path, zgroup_bytes, zattrs_bytes, maybe_consolidated_metadata_bytes + ) + else: + # V3 groups are comprised of a zarr.json object + assert zarr_json_bytes is not None + if not isinstance(use_consolidated, bool | None): + raise TypeError("use_consolidated must be a bool or None for Zarr V3.") + + return cls._from_bytes_v3( + store_path, + zarr_json_bytes, + use_consolidated=use_consolidated, + ) + + @classmethod + def _from_bytes_v2( + cls, + store_path: StorePath, + zgroup_bytes: Buffer, + zattrs_bytes: Buffer | None, + consolidated_metadata_bytes: Buffer | None, + ) -> AsyncGroup: + # V2 groups are comprised of a .zgroup and .zattrs objects + zgroup = json.loads(zgroup_bytes.to_bytes()) + zattrs = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} + group_metadata = {**zgroup, "attributes": zattrs} + + if consolidated_metadata_bytes is not None: + v2_consolidated_metadata = json.loads(consolidated_metadata_bytes.to_bytes()) + v2_consolidated_metadata = v2_consolidated_metadata["metadata"] + # We already read zattrs and zgroup. Should we ignore these? + v2_consolidated_metadata.pop(".zattrs") + v2_consolidated_metadata.pop(".zgroup") + + consolidated_metadata: defaultdict[str, dict[str, Any]] = defaultdict(dict) + + # keys like air/.zarray, air/.zattrs + for k, v in v2_consolidated_metadata.items(): + path, kind = k.rsplit("/.", 1) + + if kind == "zarray": + consolidated_metadata[path].update(v) + elif kind == "zattrs": + consolidated_metadata[path]["attributes"] = v + elif kind == "zgroup": + consolidated_metadata[path].update(v) + else: + raise ValueError(f"Invalid file type '{kind}' at path '{path}") + group_metadata["consolidated_metadata"] = { + "metadata": dict(consolidated_metadata), + "kind": "inline", + "must_understand": False, + } + + return cls.from_dict(store_path, group_metadata) + + @classmethod + def _from_bytes_v3( + cls, store_path: StorePath, zarr_json_bytes: Buffer, use_consolidated: bool | None + ) -> AsyncGroup: + group_metadata = json.loads(zarr_json_bytes.to_bytes()) + if use_consolidated and group_metadata.get("consolidated_metadata") is None: + msg = f"Consolidated metadata requested with 'use_consolidated=True' but not found in '{store_path.path}'." + raise ValueError(msg) + + elif use_consolidated is False: + # Drop consolidated metadata if it's there. + group_metadata.pop("consolidated_metadata", None) + + return cls.from_dict(store_path, group_metadata) + + @classmethod + def from_dict( + cls, + store_path: StorePath, + data: dict[str, Any], + ) -> AsyncGroup: + return cls( + metadata=GroupMetadata.from_dict(data), + store_path=store_path, + ) + + async def getitem( + self, + key: str, + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup: + store_path = self.store_path / key + logger.debug("key=%s, store_path=%s", key, store_path) + + # Consolidated metadata lets us avoid some I/O operations so try that first. + if self.metadata.consolidated_metadata is not None: + return self._getitem_consolidated(store_path, key, prefix=self.name) + + # Note: + # in zarr-python v2, we first check if `key` references an Array, else if `key` references + # a group,using standalone `contains_array` and `contains_group` functions. These functions + # are reusable, but for v3 they would perform redundant I/O operations. + # Not clear how much of that strategy we want to keep here. + elif self.metadata.zarr_format == 3: + zarr_json_bytes = await (store_path / ZARR_JSON).get() + if zarr_json_bytes is None: + raise KeyError(key) + else: + zarr_json = json.loads(zarr_json_bytes.to_bytes()) + if zarr_json["node_type"] == "group": + return type(self).from_dict(store_path, zarr_json) + elif zarr_json["node_type"] == "array": + return AsyncArray.from_dict(store_path, zarr_json) + else: + raise ValueError(f"unexpected node_type: {zarr_json['node_type']}") + elif self.metadata.zarr_format == 2: + # Q: how do we like optimistically fetching .zgroup, .zarray, and .zattrs? + # This guarantees that we will always make at least one extra request to the store + zgroup_bytes, zarray_bytes, zattrs_bytes = await asyncio.gather( + (store_path / ZGROUP_JSON).get(), + (store_path / ZARRAY_JSON).get(), + (store_path / ZATTRS_JSON).get(), + ) + + if zgroup_bytes is None and zarray_bytes is None: + raise KeyError(key) + + # unpack the zarray, if this is None then we must be opening a group + zarray = json.loads(zarray_bytes.to_bytes()) if zarray_bytes else None + # unpack the zattrs, this can be None if no attrs were written + zattrs = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} + + if zarray is not None: + # TODO: update this once the V2 array support is part of the primary array class + zarr_json = {**zarray, "attributes": zattrs} + return AsyncArray.from_dict(store_path, zarr_json) + else: + zgroup = ( + json.loads(zgroup_bytes.to_bytes()) + if zgroup_bytes is not None + else {"zarr_format": self.metadata.zarr_format} + ) + zarr_json = {**zgroup, "attributes": zattrs} + return type(self).from_dict(store_path, zarr_json) + else: + raise ValueError(f"unexpected zarr_format: {self.metadata.zarr_format}") + + def _getitem_consolidated( + self, store_path: StorePath, key: str, prefix: str + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup: + # getitem, in the special case where we have consolidated metadata. + # Note that this is a regular def (non async) function. + # This shouldn't do any additional I/O. + + # the caller needs to verify this! + assert self.metadata.consolidated_metadata is not None + + try: + metadata = self.metadata.consolidated_metadata.metadata[key] + except KeyError as e: + # The Group Metadata has consolidated metadata, but the key + # isn't present. We trust this to mean that the key isn't in + # the hierarchy, and *don't* fall back to checking the store. + msg = f"'{key}' not found in consolidated metadata." + raise KeyError(msg) from e + + # update store_path to ensure that AsyncArray/Group.name is correct + if prefix != "/": + key = "/".join([prefix.lstrip("/"), key]) + store_path = StorePath(store=store_path.store, path=key) + + if isinstance(metadata, GroupMetadata): + return AsyncGroup(metadata=metadata, store_path=store_path) + else: + return AsyncArray(metadata=metadata, store_path=store_path) + + async def delitem(self, key: str) -> None: + store_path = self.store_path / key + if self.metadata.zarr_format == 3: + await (store_path / ZARR_JSON).delete() + + elif self.metadata.zarr_format == 2: + await asyncio.gather( + (store_path / ZGROUP_JSON).delete(), # TODO: missing_ok=False + (store_path / ZARRAY_JSON).delete(), # TODO: missing_ok=False + (store_path / ZATTRS_JSON).delete(), # TODO: missing_ok=True + ) + + else: + raise ValueError(f"unexpected zarr_format: {self.metadata.zarr_format}") + + if self.metadata.consolidated_metadata: + self.metadata.consolidated_metadata.metadata.pop(key, None) + await self._save_metadata() + + async def get( + self, key: str, default: DefaultT | None = None + ) -> AsyncArray[Any] | AsyncGroup | DefaultT | None: + """Obtain a group member, returning default if not found. + + Parameters + ---------- + key : str + Group member name. + default : object + Default value to return if key is not found (default: None). + + Returns + ------- + object + Group member (AsyncArray or AsyncGroup) or default if not found. + """ + try: + return await self.getitem(key) + except KeyError: + return default + + async def _save_metadata(self, ensure_parents: bool = False) -> None: + to_save = self.metadata.to_buffer_dict(default_buffer_prototype()) + awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()] + + if ensure_parents: + parents = _build_parents(self) + for parent in parents: + awaitables.extend( + [ + (parent.store_path / key).set_if_not_exists(value) + for key, value in parent.metadata.to_buffer_dict( + default_buffer_prototype() + ).items() + ] + ) + + await asyncio.gather(*awaitables) + + @property + def path(self) -> str: + """Storage path.""" + return self.store_path.path + + @property + def name(self) -> str: + """Group name following h5py convention.""" + if self.path: + # follow h5py convention: add leading slash + name = self.path + if name[0] != "/": + name = "/" + name + return name + return "/" + + @property + def basename(self) -> str: + """Final component of name.""" + return self.name.split("/")[-1] + + @property + def attrs(self) -> dict[str, Any]: + return self.metadata.attributes + + @property + def info(self) -> None: + raise NotImplementedError + + @property + def store(self) -> Store: + return self.store_path.store + + @property + def read_only(self) -> bool: + # Backwards compatibility for 2.x + return self.store_path.store.mode.readonly + + @property + def synchronizer(self) -> None: + # Backwards compatibility for 2.x + # Not implemented in 3.x yet. + return None + + async def create_group( + self, + name: str, + *, + exists_ok: bool = False, + attributes: dict[str, Any] | None = None, + ) -> AsyncGroup: + attributes = attributes or {} + return await type(self).from_store( + self.store_path / name, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=self.metadata.zarr_format, + ) + + async def require_group(self, name: str, overwrite: bool = False) -> AsyncGroup: + """Obtain a sub-group, creating one if it doesn't exist. + + Parameters + ---------- + name : str + Group name. + overwrite : bool, optional + Overwrite any existing group with given `name` if present. + + Returns + ------- + g : AsyncGroup + """ + if overwrite: + # TODO: check that exists_ok=True errors if an array exists where the group is being created + grp = await self.create_group(name, exists_ok=True) + else: + try: + item: ( + AsyncGroup | AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] + ) = await self.getitem(name) + if not isinstance(item, AsyncGroup): + raise TypeError( + f"Incompatible object ({item.__class__.__name__}) already exists" + ) + assert isinstance(item, AsyncGroup) # make mypy happy + grp = item + except KeyError: + grp = await self.create_group(name) + return grp + + async def require_groups(self, *names: str) -> tuple[AsyncGroup, ...]: + """Convenience method to require multiple groups in a single call.""" + if not names: + return () + return tuple(await asyncio.gather(*(self.require_group(name) for name in names))) + + async def create_array( + self, + name: str, + *, + shape: ShapeLike, + dtype: npt.DTypeLike = "float64", + fill_value: Any | None = None, + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + # v2 only + chunks: ShapeLike | None = None, + dimension_separator: Literal[".", "/"] | None = None, + order: Literal["C", "F"] | None = None, + filters: list[dict[str, JSON]] | None = None, + compressor: dict[str, JSON] | None = None, + # runtime + exists_ok: bool = False, + data: npt.ArrayLike | None = None, + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """ + Create a Zarr array within this AsyncGroup. + This method lightly wraps AsyncArray.create. + + Parameters + ---------- + name: str + The name of the array. + shape: tuple[int, ...] + The shape of the array. + dtype: np.DtypeLike = float64 + The data type of the array. + chunk_shape: tuple[int, ...] | None = None + The shape of the chunks of the array. V3 only. + chunk_key_encoding: ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None + A specification of how the chunk keys are represented in storage. + codecs: Iterable[Codec | dict[str, JSON]] | None = None + An iterable of Codec or dict serializations thereof. The elements of + this collection specify the transformation from array values to stored bytes. + dimension_names: Iterable[str] | None = None + The names of the dimensions of the array. V3 only. + chunks: ChunkCoords | None = None + The shape of the chunks of the array. V2 only. + dimension_separator: Literal[".", "/"] | None = None + The delimiter used for the chunk keys. + order: Literal["C", "F"] | None = None + The memory order of the array. + filters: list[dict[str, JSON]] | None = None + Filters for the array. + compressor: dict[str, JSON] | None = None + The compressor for the array. + exists_ok: bool = False + If True, a pre-existing array or group at the path of this array will + be overwritten. If False, the presence of a pre-existing array or group is + an error. + + Returns + ------- + AsyncArray + + """ + return await AsyncArray.create( + self.store_path / name, + shape=shape, + dtype=dtype, + chunk_shape=chunk_shape, + fill_value=fill_value, + chunk_key_encoding=chunk_key_encoding, + codecs=codecs, + dimension_names=dimension_names, + attributes=attributes, + chunks=chunks, + dimension_separator=dimension_separator, + order=order, + filters=filters, + compressor=compressor, + exists_ok=exists_ok, + zarr_format=self.metadata.zarr_format, + data=data, + ) + + @deprecated("Use AsyncGroup.create_array instead.") + async def create_dataset( + self, name: str, **kwargs: Any + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Create an array. + + Arrays are known as "datasets" in HDF5 terminology. For compatibility + with h5py, Zarr groups also implement the :func:`zarr.AsyncGroup.require_dataset` method. + + Parameters + ---------- + name : str + Array name. + kwargs : dict + Additional arguments passed to :func:`zarr.AsyncGroup.create_array`. + + Returns + ------- + a : AsyncArray + + .. deprecated:: 3.0.0 + The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.create_array` instead. + """ + return await self.create_array(name, **kwargs) + + @deprecated("Use AsyncGroup.require_array instead.") + async def require_dataset( + self, + name: str, + *, + shape: ChunkCoords, + dtype: npt.DTypeLike = None, + exact: bool = False, + **kwargs: Any, + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Obtain an array, creating if it doesn't exist. + + Arrays are known as "datasets" in HDF5 terminology. For compatibility + with h5py, Zarr groups also implement the :func:`zarr.AsyncGroup.create_dataset` method. + + Other `kwargs` are as per :func:`zarr.AsyncGroup.create_dataset`. + + Parameters + ---------- + name : str + Array name. + shape : int or tuple of ints + Array shape. + dtype : str or dtype, optional + NumPy dtype. + exact : bool, optional + If True, require `dtype` to match exactly. If false, require + `dtype` can be cast from array dtype. + + Returns + ------- + a : AsyncArray + + .. deprecated:: 3.0.0 + The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.require_dataset` instead. + """ + return await self.require_array(name, shape=shape, dtype=dtype, exact=exact, **kwargs) + + async def require_array( + self, + name: str, + *, + shape: ShapeLike, + dtype: npt.DTypeLike = None, + exact: bool = False, + **kwargs: Any, + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + """Obtain an array, creating if it doesn't exist. + + Other `kwargs` are as per :func:`zarr.AsyncGroup.create_dataset`. + + Parameters + ---------- + name : str + Array name. + shape : int or tuple of ints + Array shape. + dtype : str or dtype, optional + NumPy dtype. + exact : bool, optional + If True, require `dtype` to match exactly. If false, require + `dtype` can be cast from array dtype. + + Returns + ------- + a : AsyncArray + """ + try: + ds = await self.getitem(name) + if not isinstance(ds, AsyncArray): + raise TypeError(f"Incompatible object ({ds.__class__.__name__}) already exists") + + shape = parse_shapelike(shape) + if shape != ds.shape: + raise TypeError(f"Incompatible shape ({ds.shape} vs {shape})") + + dtype = np.dtype(dtype) + if exact: + if ds.dtype != dtype: + raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})") + else: + if not np.can_cast(ds.dtype, dtype): + raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})") + except KeyError: + ds = await self.create_array(name, shape=shape, dtype=dtype, **kwargs) + + return ds + + async def update_attributes(self, new_attributes: dict[str, Any]) -> AsyncGroup: + # metadata.attributes is "frozen" so we simply clear and update the dict + self.metadata.attributes.clear() + self.metadata.attributes.update(new_attributes) + + # Write new metadata + await self._save_metadata() + + return self + + def __repr__(self) -> str: + return f"" + + async def nmembers( + self, + max_depth: int | None = 0, + ) -> int: + """ + Count the number of members in this group. + + Parameters + ---------- + max_depth : int, default 0 + The maximum number of levels of the hierarchy to include. By + default, (``max_depth=0``) only immediate children are included. Set + ``max_depth=None`` to include all nodes, and some positive integer + to consider children within that many levels of the root Group. + + Returns + ------- + count : int + """ + if self.metadata.consolidated_metadata is not None: + return len(self.metadata.consolidated_metadata.flattened_metadata) + # TODO: consider using aioitertools.builtins.sum for this + # return await aioitertools.builtins.sum((1 async for _ in self.members()), start=0) + n = 0 + async for _ in self.members(max_depth=max_depth): + n += 1 + return n + + async def members( + self, + max_depth: int | None = 0, + ) -> AsyncGenerator[ + tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup], None + ]: + """ + Returns an AsyncGenerator over the arrays and groups contained in this group. + This method requires that `store_path.store` supports directory listing. + + The results are not guaranteed to be ordered. + + Parameters + ---------- + max_depth : int, default 0 + The maximum number of levels of the hierarchy to include. By + default, (``max_depth=0``) only immediate children are included. Set + ``max_depth=None`` to include all nodes, and some positive integer + to consider children within that many levels of the root Group. + + Returns + ------- + path: + A string giving the path to the target, relative to the Group ``self``. + value: AsyncArray or AsyncGroup + The AsyncArray or AsyncGroup that is a child of ``self``. + """ + if max_depth is not None and max_depth < 0: + raise ValueError(f"max_depth must be None or >= 0. Got '{max_depth}' instead") + async for item in self._members(max_depth=max_depth, current_depth=0): + yield item + + async def _members( + self, max_depth: int | None, current_depth: int + ) -> AsyncGenerator[ + tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup], None + ]: + if self.metadata.consolidated_metadata is not None: + # we should be able to do members without any additional I/O + members = self._members_consolidated(max_depth, current_depth) + + for member in members: + yield member + return + + if not self.store_path.store.supports_listing: + msg = ( + f"The store associated with this group ({type(self.store_path.store)}) " + "does not support listing, " + "specifically via the `list_dir` method. " + "This function requires a store that supports listing." + ) + + raise ValueError(msg) + # would be nice to make these special keys accessible programmatically, + # and scoped to specific zarr versions + # especially true for `.zmetadata` which is configurable + _skip_keys = ("zarr.json", ".zgroup", ".zattrs", ".zmetadata") + + # hmm lots of I/O and logic interleaved here. + # We *could* have an async gen over self.metadata.consolidated_metadata.metadata.keys() + # and plug in here. `getitem` will skip I/O. + # Kinda a shame to have all the asyncio task overhead though, when it isn't needed. + + async for key in self.store_path.store.list_dir(self.store_path.path): + if key in _skip_keys: + continue + try: + obj = await self.getitem(key) + yield (key, obj) + + if ( + ((max_depth is None) or (current_depth < max_depth)) + and hasattr(obj.metadata, "node_type") + and obj.metadata.node_type == "group" + ): + # the assert is just for mypy to know that `obj.metadata.node_type` + # implies an AsyncGroup, not an AsyncArray + assert isinstance(obj, AsyncGroup) + async for child_key, val in obj._members( + max_depth=max_depth, current_depth=current_depth + 1 + ): + yield f"{key}/{child_key}", val + except KeyError: + # keyerror is raised when `key` names an object (in the object storage sense), + # as opposed to a prefix, in the store under the prefix associated with this group + # in which case `key` cannot be the name of a sub-array or sub-group. + warnings.warn( + f"Object at {key} is not recognized as a component of a Zarr hierarchy.", + UserWarning, + stacklevel=1, + ) + + def _members_consolidated( + self, max_depth: int | None, current_depth: int, prefix: str = "" + ) -> Generator[ + tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup], None + ]: + consolidated_metadata = self.metadata.consolidated_metadata + + # we kind of just want the top-level keys. + if consolidated_metadata is not None: + for key in consolidated_metadata.metadata.keys(): + obj = self._getitem_consolidated( + self.store_path, key, prefix=self.name + ) # Metadata -> Group/Array + key = f"{prefix}/{key}".lstrip("/") + yield key, obj + + if ((max_depth is None) or (current_depth < max_depth)) and isinstance( + obj, AsyncGroup + ): + yield from obj._members_consolidated(max_depth, current_depth + 1, prefix=key) + + async def keys(self) -> AsyncGenerator[str, None]: + async for key, _ in self.members(): + yield key + + async def contains(self, member: str) -> bool: + # TODO: this can be made more efficient. + try: + await self.getitem(member) + except KeyError: + return False + else: + return True + + async def groups(self) -> AsyncGenerator[tuple[str, AsyncGroup], None]: + async for name, value in self.members(): + if isinstance(value, AsyncGroup): + yield name, value + + async def group_keys(self) -> AsyncGenerator[str, None]: + async for key, _ in self.groups(): + yield key + + async def group_values(self) -> AsyncGenerator[AsyncGroup, None]: + async for _, group in self.groups(): + yield group + + async def arrays( + self, + ) -> AsyncGenerator[ + tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]], None + ]: + async for key, value in self.members(): + if isinstance(value, AsyncArray): + yield key, value + + async def array_keys(self) -> AsyncGenerator[str, None]: + async for key, _ in self.arrays(): + yield key + + async def array_values( + self, + ) -> AsyncGenerator[AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], None]: + async for _, array in self.arrays(): + yield array + + async def tree(self, expand: bool = False, level: int | None = None) -> Any: + raise NotImplementedError + + async def empty( + self, *, name: str, shape: ChunkCoords, **kwargs: Any + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + return await async_api.empty(shape=shape, store=self.store_path, path=name, **kwargs) + + async def zeros( + self, *, name: str, shape: ChunkCoords, **kwargs: Any + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + return await async_api.zeros(shape=shape, store=self.store_path, path=name, **kwargs) + + async def ones( + self, *, name: str, shape: ChunkCoords, **kwargs: Any + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + return await async_api.ones(shape=shape, store=self.store_path, path=name, **kwargs) + + async def full( + self, *, name: str, shape: ChunkCoords, fill_value: Any | None, **kwargs: Any + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + return await async_api.full( + shape=shape, fill_value=fill_value, store=self.store_path, path=name, **kwargs + ) + + async def empty_like( + self, *, name: str, data: async_api.ArrayLike, **kwargs: Any + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + return await async_api.empty_like(a=data, store=self.store_path, path=name, **kwargs) + + async def zeros_like( + self, *, name: str, data: async_api.ArrayLike, **kwargs: Any + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + return await async_api.zeros_like(a=data, store=self.store_path, path=name, **kwargs) + + async def ones_like( + self, *, name: str, data: async_api.ArrayLike, **kwargs: Any + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + return await async_api.ones_like(a=data, store=self.store_path, path=name, **kwargs) + + async def full_like( + self, *, name: str, data: async_api.ArrayLike, **kwargs: Any + ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: + return await async_api.full_like(a=data, store=self.store_path, path=name, **kwargs) + + async def move(self, source: str, dest: str) -> None: + raise NotImplementedError + + +@dataclass(frozen=True) +class Group(SyncMixin): + _async_group: AsyncGroup + + @classmethod + def from_store( + cls, + store: StoreLike, + *, + attributes: dict[str, Any] | None = None, + zarr_format: ZarrFormat = 3, + exists_ok: bool = False, + ) -> Group: + attributes = attributes or {} + obj = sync( + AsyncGroup.from_store( + store, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + ), + ) + + return cls(obj) + + @classmethod + def open( + cls, + store: StoreLike, + zarr_format: Literal[2, 3, None] = 3, + ) -> Group: + obj = sync(AsyncGroup.open(store, zarr_format=zarr_format)) + return cls(obj) + + def __getitem__(self, path: str) -> Array | Group: + obj = self._sync(self._async_group.getitem(path)) + if isinstance(obj, AsyncArray): + return Array(obj) + else: + return Group(obj) + + def get(self, path: str, default: DefaultT | None = None) -> Array | Group | DefaultT | None: + """Obtain a group member, returning default if not found. + + Parameters + ---------- + key : str + Group member name. + default : object + Default value to return if key is not found (default: None). + + Returns + ------- + object + Group member (Array or Group) or default if not found. + """ + try: + return self[path] + except KeyError: + return default + + def __delitem__(self, key: str) -> None: + self._sync(self._async_group.delitem(key)) + + def __iter__(self) -> Iterator[str]: + yield from self.keys() + + def __len__(self) -> int: + return self.nmembers() + + def __setitem__(self, key: str, value: Any) -> None: + """__setitem__ is not supported in v3""" + raise NotImplementedError + + def __repr__(self) -> str: + return f"" + + async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group: + new_metadata = replace(self.metadata, attributes=new_attributes) + + # Write new metadata + to_save = new_metadata.to_buffer_dict(default_buffer_prototype()) + awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()] + await asyncio.gather(*awaitables) + + async_group = replace(self._async_group, metadata=new_metadata) + return replace(self, _async_group=async_group) + + @property + def store_path(self) -> StorePath: + return self._async_group.store_path + + @property + def metadata(self) -> GroupMetadata: + return self._async_group.metadata + + @property + def path(self) -> str: + """Storage path.""" + return self._async_group.path + + @property + def name(self) -> str: + """Group name following h5py convention.""" + return self._async_group.name + + @property + def basename(self) -> str: + """Final component of name.""" + return self._async_group.basename + + @property + def attrs(self) -> Attributes: + return Attributes(self) + + @property + def info(self) -> None: + raise NotImplementedError + + @property + def store(self) -> Store: + # Backwards compatibility for 2.x + return self._async_group.store + + @property + def read_only(self) -> bool: + # Backwards compatibility for 2.x + return self._async_group.read_only + + @property + def synchronizer(self) -> None: + # Backwards compatibility for 2.x + # Not implemented in 3.x yet. + return self._async_group.synchronizer + + def update_attributes(self, new_attributes: dict[str, Any]) -> Group: + self._sync(self._async_group.update_attributes(new_attributes)) + return self + + def nmembers(self, max_depth: int | None = 0) -> int: + return self._sync(self._async_group.nmembers(max_depth=max_depth)) + + def members(self, max_depth: int | None = 0) -> tuple[tuple[str, Array | Group], ...]: + """ + Return the sub-arrays and sub-groups of this group as a tuple of (name, array | group) + pairs + """ + _members = self._sync_iter(self._async_group.members(max_depth=max_depth)) + + return tuple((kv[0], _parse_async_node(kv[1])) for kv in _members) + + def keys(self) -> Generator[str, None]: + yield from self._sync_iter(self._async_group.keys()) + + def __contains__(self, member: str) -> bool: + return self._sync(self._async_group.contains(member)) + + def groups(self) -> Generator[tuple[str, Group], None]: + for name, async_group in self._sync_iter(self._async_group.groups()): + yield name, Group(async_group) + + def group_keys(self) -> Generator[str, None]: + for name, _ in self.groups(): + yield name + + def group_values(self) -> Generator[Group, None]: + for _, group in self.groups(): + yield group + + def arrays(self) -> Generator[tuple[str, Array], None]: + for name, async_array in self._sync_iter(self._async_group.arrays()): + yield name, Array(async_array) + + def array_keys(self) -> Generator[str, None]: + for name, _ in self.arrays(): + yield name + + def array_values(self) -> Generator[Array, None]: + for _, array in self.arrays(): + yield array + + def tree(self, expand: bool = False, level: int | None = None) -> Any: + return self._sync(self._async_group.tree(expand=expand, level=level)) + + def create_group(self, name: str, **kwargs: Any) -> Group: + return Group(self._sync(self._async_group.create_group(name, **kwargs))) + + def require_group(self, name: str, **kwargs: Any) -> Group: + """Obtain a sub-group, creating one if it doesn't exist. + + Parameters + ---------- + name : str + Group name. + overwrite : bool, optional + Overwrite any existing group with given `name` if present. + + Returns + ------- + g : Group + """ + return Group(self._sync(self._async_group.require_group(name, **kwargs))) + + def require_groups(self, *names: str) -> tuple[Group, ...]: + """Convenience method to require multiple groups in a single call.""" + return tuple(map(Group, self._sync(self._async_group.require_groups(*names)))) + + def create(self, *args: Any, **kwargs: Any) -> Array: + # Backwards compatibility for 2.x + return self.create_array(*args, **kwargs) + + def create_array( + self, + name: str, + *, + shape: ShapeLike, + dtype: npt.DTypeLike = "float64", + fill_value: Any | None = None, + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + # v2 only + chunks: ShapeLike | None = None, + dimension_separator: Literal[".", "/"] | None = None, + order: Literal["C", "F"] | None = None, + filters: list[dict[str, JSON]] | None = None, + compressor: dict[str, JSON] | None = None, + # runtime + exists_ok: bool = False, + data: npt.ArrayLike | None = None, + ) -> Array: + """ + Create a zarr array within this AsyncGroup. + This method lightly wraps AsyncArray.create. + + Parameters + ---------- + name: str + The name of the array. + shape: tuple[int, ...] + The shape of the array. + dtype: np.DtypeLike = float64 + The data type of the array. + chunk_shape: tuple[int, ...] | None = None + The shape of the chunks of the array. V3 only. + chunk_key_encoding: ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None + A specification of how the chunk keys are represented in storage. + codecs: Iterable[Codec | dict[str, JSON]] | None = None + An iterable of Codec or dict serializations thereof. The elements of this collection + specify the transformation from array values to stored bytes. + dimension_names: Iterable[str] | None = None + The names of the dimensions of the array. V3 only. + chunks: ChunkCoords | None = None + The shape of the chunks of the array. V2 only. + dimension_separator: Literal[".", "/"] | None = None + The delimiter used for the chunk keys. + order: Literal["C", "F"] | None = None + The memory order of the array. + filters: list[dict[str, JSON]] | None = None + Filters for the array. + compressor: dict[str, JSON] | None = None + The compressor for the array. + exists_ok: bool = False + If True, a pre-existing array or group at the path of this array will + be overwritten. If False, the presence of a pre-existing array or group is + an error. + data: npt.ArrayLike | None = None + Array data to initialize the array with. + + Returns + ------- + Array + + """ + return Array( + self._sync( + self._async_group.create_array( + name=name, + shape=shape, + dtype=dtype, + fill_value=fill_value, + attributes=attributes, + chunk_shape=chunk_shape, + chunk_key_encoding=chunk_key_encoding, + codecs=codecs, + dimension_names=dimension_names, + chunks=chunks, + dimension_separator=dimension_separator, + order=order, + filters=filters, + compressor=compressor, + exists_ok=exists_ok, + data=data, + ) + ) + ) + + @deprecated("Use Group.create_array instead.") + def create_dataset(self, name: str, **kwargs: Any) -> Array: + """Create an array. + + Arrays are known as "datasets" in HDF5 terminology. For compatibility + with h5py, Zarr groups also implement the :func:`zarr.Group.require_dataset` method. + + Parameters + ---------- + name : str + Array name. + kwargs : dict + Additional arguments passed to :func:`zarr.Group.create_array` + + Returns + ------- + a : Array + + .. deprecated:: 3.0.0 + The h5py compatibility methods will be removed in 3.1.0. Use `Group.create_array` instead. + """ + return Array(self._sync(self._async_group.create_dataset(name, **kwargs))) + + @deprecated("Use Group.require_array instead.") + def require_dataset(self, name: str, **kwargs: Any) -> Array: + """Obtain an array, creating if it doesn't exist. + + Arrays are known as "datasets" in HDF5 terminology. For compatibility + with h5py, Zarr groups also implement the :func:`zarr.Group.create_dataset` method. + + Other `kwargs` are as per :func:`zarr.Group.create_dataset`. + + Parameters + ---------- + name : str + Array name. + shape : int or tuple of ints + Array shape. + dtype : str or dtype, optional + NumPy dtype. + exact : bool, optional + If True, require `dtype` to match exactly. If false, require + `dtype` can be cast from array dtype. + + Returns + ------- + a : Array + + .. deprecated:: 3.0.0 + The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_array` instead. + """ + return Array(self._sync(self._async_group.require_array(name, **kwargs))) + + def require_array(self, name: str, **kwargs: Any) -> Array: + """Obtain an array, creating if it doesn't exist. + + + Other `kwargs` are as per :func:`zarr.Group.create_array`. + + Parameters + ---------- + name : str + Array name. + shape : int or tuple of ints + Array shape. + dtype : str or dtype, optional + NumPy dtype. + exact : bool, optional + If True, require `dtype` to match exactly. If false, require + `dtype` can be cast from array dtype. + + Returns + ------- + a : Array + """ + return Array(self._sync(self._async_group.require_array(name, **kwargs))) + + def empty(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array: + return Array(self._sync(self._async_group.empty(name=name, shape=shape, **kwargs))) + + def zeros(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array: + return Array(self._sync(self._async_group.zeros(name=name, shape=shape, **kwargs))) + + def ones(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array: + return Array(self._sync(self._async_group.ones(name=name, shape=shape, **kwargs))) + + def full( + self, *, name: str, shape: ChunkCoords, fill_value: Any | None, **kwargs: Any + ) -> Array: + return Array( + self._sync( + self._async_group.full(name=name, shape=shape, fill_value=fill_value, **kwargs) + ) + ) + + def empty_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(self._sync(self._async_group.empty_like(name=name, data=data, **kwargs))) + + def zeros_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(self._sync(self._async_group.zeros_like(name=name, data=data, **kwargs))) + + def ones_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(self._sync(self._async_group.ones_like(name=name, data=data, **kwargs))) + + def full_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(self._sync(self._async_group.full_like(name=name, data=data, **kwargs))) + + def move(self, source: str, dest: str) -> None: + return self._sync(self._async_group.move(source, dest)) + + @deprecated("Use Group.create_array instead.") + def array( + self, + name: str, + *, + shape: ChunkCoords, + dtype: npt.DTypeLike = "float64", + fill_value: Any | None = None, + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + # v2 only + chunks: ChunkCoords | None = None, + dimension_separator: Literal[".", "/"] | None = None, + order: Literal["C", "F"] | None = None, + filters: list[dict[str, JSON]] | None = None, + compressor: dict[str, JSON] | None = None, + # runtime + exists_ok: bool = False, + data: npt.ArrayLike | None = None, + ) -> Array: + """ + Create a zarr array within this AsyncGroup. + This method lightly wraps `AsyncArray.create`. + + Parameters + ---------- + name: str + The name of the array. + shape: tuple[int, ...] + The shape of the array. + dtype: np.DtypeLike = float64 + The data type of the array. + chunk_shape: tuple[int, ...] | None = None + The shape of the chunks of the array. V3 only. + chunk_key_encoding: ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None + A specification of how the chunk keys are represented in storage. + codecs: Iterable[Codec | dict[str, JSON]] | None = None + An iterable of Codec or dict serializations thereof. The elements of + this collection specify the transformation from array values to stored bytes. + dimension_names: Iterable[str] | None = None + The names of the dimensions of the array. V3 only. + chunks: ChunkCoords | None = None + The shape of the chunks of the array. V2 only. + dimension_separator: Literal[".", "/"] | None = None + The delimiter used for the chunk keys. + order: Literal["C", "F"] | None = None + The memory order of the array. + filters: list[dict[str, JSON]] | None = None + Filters for the array. + compressor: dict[str, JSON] | None = None + The compressor for the array. + exists_ok: bool = False + If True, a pre-existing array or group at the path of this array will + be overwritten. If False, the presence of a pre-existing array or group is + an error. + data: npt.ArrayLike | None = None + Array data to initialize the array with. + + Returns + ------- + + Array + + """ + return Array( + self._sync( + self._async_group.create_array( + name=name, + shape=shape, + dtype=dtype, + fill_value=fill_value, + attributes=attributes, + chunk_shape=chunk_shape, + chunk_key_encoding=chunk_key_encoding, + codecs=codecs, + dimension_names=dimension_names, + chunks=chunks, + dimension_separator=dimension_separator, + order=order, + filters=filters, + compressor=compressor, + exists_ok=exists_ok, + data=data, + ) + ) + ) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py new file mode 100644 index 0000000000..d2e29b3b55 --- /dev/null +++ b/src/zarr/core/indexing.py @@ -0,0 +1,1393 @@ +from __future__ import annotations + +import itertools +import math +import numbers +import operator +from collections.abc import Iterator, Sequence +from dataclasses import dataclass +from enum import Enum +from functools import reduce +from types import EllipsisType +from typing import ( + TYPE_CHECKING, + Any, + Literal, + NamedTuple, + Protocol, + TypeAlias, + TypeGuard, + TypeVar, + cast, + runtime_checkable, +) + +import numpy as np +import numpy.typing as npt + +from zarr.core.common import product + +if TYPE_CHECKING: + from zarr.core.array import Array + from zarr.core.buffer import NDArrayLike + from zarr.core.chunk_grids import ChunkGrid + from zarr.core.common import ChunkCoords + +IntSequence = list[int] | npt.NDArray[np.intp] +ArrayOfIntOrBool = npt.NDArray[np.intp] | npt.NDArray[np.bool_] +BasicSelector = int | slice | EllipsisType +Selector = BasicSelector | ArrayOfIntOrBool +BasicSelection = BasicSelector | tuple[BasicSelector, ...] # also used for BlockIndex +CoordinateSelection = IntSequence | tuple[IntSequence, ...] +MaskSelection = npt.NDArray[np.bool_] +OrthogonalSelection = Selector | tuple[Selector, ...] +Selection = BasicSelection | CoordinateSelection | MaskSelection | OrthogonalSelection +CoordinateSelectionNormalized = tuple[npt.NDArray[np.intp], ...] +SelectionNormalized = tuple[Selector, ...] | ArrayOfIntOrBool +SelectionWithFields = Selection | str | Sequence[str] +SelectorTuple = tuple[Selector, ...] | npt.NDArray[np.intp] | slice +Fields = str | list[str] | tuple[str, ...] + + +class ArrayIndexError(IndexError): + pass + + +class BoundsCheckError(IndexError): + _msg = "" + + def __init__(self, dim_len: int) -> None: + self._msg = f"index out of bounds for dimension with length {dim_len}" + + +class NegativeStepError(IndexError): + _msg = "only slices with step >= 1 are supported" + + +class VindexInvalidSelectionError(IndexError): + _msg = ( + "unsupported selection type for vectorized indexing; only " + "coordinate selection (tuple of integer arrays) and mask selection " + "(single Boolean array) are supported; got {0!r}" + ) + + +def err_too_many_indices(selection: Any, shape: ChunkCoords) -> None: + raise IndexError(f"too many indices for array; expected {len(shape)}, got {len(selection)}") + + +def _zarr_array_to_int_or_bool_array(arr: Array) -> npt.NDArray[np.intp] | npt.NDArray[np.bool_]: + if arr.dtype.kind in ("i", "b"): + return np.asarray(arr) + else: + raise IndexError( + f"Invalid array dtype: {arr.dtype}. Arrays used as indices must be of integer or boolean type" + ) + + +@runtime_checkable +class Indexer(Protocol): + shape: ChunkCoords + drop_axes: ChunkCoords + + def __iter__(self) -> Iterator[ChunkProjection]: ... + + +def ceildiv(a: float, b: float) -> int: + return math.ceil(a / b) + + +_ArrayIndexingOrder: TypeAlias = Literal["lexicographic"] + + +def _iter_grid( + grid_shape: Sequence[int], + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, + order: _ArrayIndexingOrder = "lexicographic", +) -> Iterator[ChunkCoords]: + """ + Iterate over the elements of grid of integers, with the option to restrict the domain of + iteration to a contiguous subregion of that grid. + + Parameters + ---------- + grid_shape: Sequence[int] + The size of the domain to iterate over. + origin: Sequence[int] | None, default=None + The first coordinate of the domain to return. + selection_shape: Sequence[int] | None, default=None + The shape of the selection. + order: Literal["lexicographic"], default="lexicographic" + The linear indexing order to use. + + Returns + ------- + + itertools.product object + An iterator over tuples of integers + + Examples + -------- + >>> tuple(iter_grid((1,))) + ((0,),) + + >>> tuple(iter_grid((2,3))) + ((0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)) + + >>> tuple(iter_grid((2,3)), origin=(1,1)) + ((1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3)) + + >>> tuple(iter_grid((2,3)), origin=(1,1), selection_shape=(2,2)) + ((1, 1), (1, 2), (1, 3), (2, 1)) + """ + if origin is None: + origin_parsed = (0,) * len(grid_shape) + else: + if len(origin) != len(grid_shape): + msg = ( + "Shape and origin parameters must have the same length." + f"Got {len(grid_shape)} elements in shape, but {len(origin)} elements in origin." + ) + raise ValueError(msg) + origin_parsed = tuple(origin) + if selection_shape is None: + selection_shape_parsed = tuple( + g - o for o, g in zip(origin_parsed, grid_shape, strict=True) + ) + else: + selection_shape_parsed = tuple(selection_shape) + if order == "lexicographic": + dimensions: tuple[range, ...] = () + for idx, (o, gs, ss) in enumerate( + zip(origin_parsed, grid_shape, selection_shape_parsed, strict=True) + ): + if o + ss > gs: + raise IndexError( + f"Invalid selection shape ({selection_shape}) for origin ({origin}) and grid shape ({grid_shape}) at axis {idx}." + ) + dimensions += (range(o, o + ss),) + yield from itertools.product(*(dimensions)) + + else: + msg = f"Indexing order {order} is not supported at this time." # type: ignore[unreachable] + raise NotImplementedError(msg) + + +def is_integer(x: Any) -> TypeGuard[int]: + """True if x is an integer (both pure Python or NumPy).""" + return isinstance(x, numbers.Integral) and not is_bool(x) + + +def is_bool(x: Any) -> TypeGuard[bool | np.bool_]: + """True if x is a boolean (both pure Python or NumPy).""" + return type(x) in [bool, np.bool_] + + +def is_integer_list(x: Any) -> TypeGuard[list[int]]: + """True if x is a list of integers.""" + return isinstance(x, list) and len(x) > 0 and all(is_integer(i) for i in x) + + +def is_bool_list(x: Any) -> TypeGuard[list[bool | np.bool_]]: + """True if x is a list of boolean.""" + return isinstance(x, list) and len(x) > 0 and all(is_bool(i) for i in x) + + +def is_integer_array(x: Any, ndim: int | None = None) -> TypeGuard[npt.NDArray[np.intp]]: + t = not np.isscalar(x) and hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype.kind in "ui" + if ndim is not None: + t = t and hasattr(x, "shape") and len(x.shape) == ndim + return t + + +def is_bool_array(x: Any, ndim: int | None = None) -> TypeGuard[npt.NDArray[np.bool_]]: + t = hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype == bool + if ndim is not None: + t = t and hasattr(x, "shape") and len(x.shape) == ndim + return t + + +def is_int_or_bool_iterable(x: Any) -> bool: + return is_integer_list(x) or is_integer_array(x) or is_bool_array(x) or is_bool_list(x) + + +def is_scalar(value: Any, dtype: np.dtype[Any]) -> bool: + if np.isscalar(value): + return True + if hasattr(value, "shape") and value.shape == (): + return True + return isinstance(value, tuple) and dtype.names is not None and len(value) == len(dtype.names) + + +def is_pure_fancy_indexing(selection: Any, ndim: int) -> bool: + """Check whether a selection contains only scalars or integer/bool array-likes. + + Parameters + ---------- + selection : tuple, slice, or scalar + A valid selection value for indexing into arrays. + + Returns + ------- + is_pure : bool + True if the selection is a pure fancy indexing expression (ie not mixed + with boolean or slices). + """ + if is_bool_array(selection): + # is mask selection + return True + + if ndim == 1: + if is_integer_list(selection) or is_integer_array(selection) or is_bool_list(selection): + return True + + # if not, we go through the normal path below, because a 1-tuple + # of integers is also allowed. + no_slicing = ( + isinstance(selection, tuple) + and len(selection) == ndim + and not (any(isinstance(elem, slice) or elem is Ellipsis for elem in selection)) + ) + return ( + no_slicing + and all( + is_integer(elem) or is_integer_list(elem) or is_integer_array(elem) + for elem in selection + ) + and any(is_integer_list(elem) or is_integer_array(elem) for elem in selection) + ) + + +def is_pure_orthogonal_indexing(selection: Selection, ndim: int) -> TypeGuard[OrthogonalSelection]: + if not ndim: + return False + + selection_normalized = (selection,) if not isinstance(selection, tuple) else selection + + # Case 1: Selection contains of iterable of integers or boolean + if len(selection_normalized) == ndim and all( + is_int_or_bool_iterable(s) for s in selection_normalized + ): + return True + + # Case 2: selection contains either zero or one integer iterables. + # All other selection elements are slices or integers + return ( + len(selection_normalized) <= ndim + and sum(is_int_or_bool_iterable(s) for s in selection_normalized) <= 1 + and all( + is_int_or_bool_iterable(s) or isinstance(s, int | slice) for s in selection_normalized + ) + ) + + +def get_chunk_shape(chunk_grid: ChunkGrid) -> ChunkCoords: + from zarr.core.chunk_grids import RegularChunkGrid + + assert isinstance( + chunk_grid, RegularChunkGrid + ), "Only regular chunk grid is supported, currently." + return chunk_grid.chunk_shape + + +def normalize_integer_selection(dim_sel: int, dim_len: int) -> int: + # normalize type to int + dim_sel = int(dim_sel) + + # handle wraparound + if dim_sel < 0: + dim_sel = dim_len + dim_sel + + # handle out of bounds + if dim_sel >= dim_len or dim_sel < 0: + raise BoundsCheckError(dim_len) + + return dim_sel + + +class ChunkDimProjection(NamedTuple): + """A mapping from chunk to output array for a single dimension. + + Parameters + ---------- + dim_chunk_ix + Index of chunk. + dim_chunk_sel + Selection of items from chunk array. + dim_out_sel + Selection of items in target (output) array. + + """ + + dim_chunk_ix: int + dim_chunk_sel: Selector + dim_out_sel: Selector | None + + +@dataclass(frozen=True) +class IntDimIndexer: + dim_sel: int + dim_len: int + dim_chunk_len: int + nitems: int = 1 + + def __init__(self, dim_sel: int, dim_len: int, dim_chunk_len: int) -> None: + object.__setattr__(self, "dim_sel", normalize_integer_selection(dim_sel, dim_len)) + object.__setattr__(self, "dim_len", dim_len) + object.__setattr__(self, "dim_chunk_len", dim_chunk_len) + + def __iter__(self) -> Iterator[ChunkDimProjection]: + dim_chunk_ix = self.dim_sel // self.dim_chunk_len + dim_offset = dim_chunk_ix * self.dim_chunk_len + dim_chunk_sel = self.dim_sel - dim_offset + dim_out_sel = None + yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) + + +@dataclass(frozen=True) +class SliceDimIndexer: + dim_len: int + dim_chunk_len: int + nitems: int + nchunks: int + + start: int + stop: int + step: int + + def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int) -> None: + # normalize + start, stop, step = dim_sel.indices(dim_len) + if step < 1: + raise NegativeStepError + + object.__setattr__(self, "start", start) + object.__setattr__(self, "stop", stop) + object.__setattr__(self, "step", step) + + object.__setattr__(self, "dim_len", dim_len) + object.__setattr__(self, "dim_chunk_len", dim_chunk_len) + object.__setattr__(self, "nitems", max(0, ceildiv((stop - start), step))) + object.__setattr__(self, "nchunks", ceildiv(dim_len, dim_chunk_len)) + + def __iter__(self) -> Iterator[ChunkDimProjection]: + # figure out the range of chunks we need to visit + dim_chunk_ix_from = self.start // self.dim_chunk_len + dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len) + + # iterate over chunks in range + for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to): + # compute offsets for chunk within overall array + dim_offset = dim_chunk_ix * self.dim_chunk_len + dim_limit = min(self.dim_len, (dim_chunk_ix + 1) * self.dim_chunk_len) + + # determine chunk length, accounting for trailing chunk + dim_chunk_len = dim_limit - dim_offset + + if self.start < dim_offset: + # selection starts before current chunk + dim_chunk_sel_start = 0 + remainder = (dim_offset - self.start) % self.step + if remainder: + dim_chunk_sel_start += self.step - remainder + # compute number of previous items, provides offset into output array + dim_out_offset = ceildiv((dim_offset - self.start), self.step) + + else: + # selection starts within current chunk + dim_chunk_sel_start = self.start - dim_offset + dim_out_offset = 0 + + if self.stop > dim_limit: + # selection ends after current chunk + dim_chunk_sel_stop = dim_chunk_len + + else: + # selection ends within current chunk + dim_chunk_sel_stop = self.stop - dim_offset + + dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step) + dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step) + + # If there are no elements on the selection within this chunk, then skip + if dim_chunk_nitems == 0: + continue + + dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems) + + yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) + + +def check_selection_length(selection: SelectionNormalized, shape: ChunkCoords) -> None: + if len(selection) > len(shape): + err_too_many_indices(selection, shape) + + +def replace_ellipsis(selection: Any, shape: ChunkCoords) -> SelectionNormalized: + selection = ensure_tuple(selection) + + # count number of ellipsis present + n_ellipsis = sum(1 for i in selection if i is Ellipsis) + + if n_ellipsis > 1: + # more than 1 is an error + raise IndexError("an index can only have a single ellipsis ('...')") + + elif n_ellipsis == 1: + # locate the ellipsis, count how many items to left and right + n_items_l = selection.index(Ellipsis) # items to left of ellipsis + n_items_r = len(selection) - (n_items_l + 1) # items to right of ellipsis + n_items = len(selection) - 1 # all non-ellipsis items + + if n_items >= len(shape): + # ellipsis does nothing, just remove it + selection = tuple(i for i in selection if i != Ellipsis) + + else: + # replace ellipsis with as many slices are needed for number of dims + new_item = selection[:n_items_l] + ((slice(None),) * (len(shape) - n_items)) + if n_items_r: + new_item += selection[-n_items_r:] + selection = new_item + + # fill out selection if not completely specified + if len(selection) < len(shape): + selection += (slice(None),) * (len(shape) - len(selection)) + + # check selection not too long + check_selection_length(selection, shape) + + return cast(SelectionNormalized, selection) + + +def replace_lists(selection: SelectionNormalized) -> SelectionNormalized: + return tuple( + np.asarray(dim_sel) if isinstance(dim_sel, list) else dim_sel for dim_sel in selection + ) + + +T = TypeVar("T") + + +def ensure_tuple(v: Any) -> SelectionNormalized: + if not isinstance(v, tuple): + v = (v,) + return cast(SelectionNormalized, v) + + +class ChunkProjection(NamedTuple): + """A mapping of items from chunk to output array. Can be used to extract items from the + chunk array for loading into an output array. Can also be used to extract items from a + value array for setting/updating in a chunk array. + + Parameters + ---------- + chunk_coords + Indices of chunk. + chunk_selection + Selection of items from chunk array. + out_selection + Selection of items in target (output) array. + + """ + + chunk_coords: ChunkCoords + chunk_selection: tuple[Selector, ...] | npt.NDArray[np.intp] + out_selection: tuple[Selector, ...] | npt.NDArray[np.intp] | slice + + +def is_slice(s: Any) -> TypeGuard[slice]: + return isinstance(s, slice) + + +def is_contiguous_slice(s: Any) -> TypeGuard[slice]: + return is_slice(s) and (s.step is None or s.step == 1) + + +def is_positive_slice(s: Any) -> TypeGuard[slice]: + return is_slice(s) and (s.step is None or s.step >= 1) + + +def is_contiguous_selection(selection: Any) -> TypeGuard[slice]: + selection = ensure_tuple(selection) + return all((is_integer_array(s) or is_contiguous_slice(s) or s == Ellipsis) for s in selection) + + +def is_basic_selection(selection: Any) -> TypeGuard[BasicSelection]: + selection = ensure_tuple(selection) + return all(is_integer(s) or is_positive_slice(s) for s in selection) + + +@dataclass(frozen=True) +class BasicIndexer(Indexer): + dim_indexers: list[IntDimIndexer | SliceDimIndexer] + shape: ChunkCoords + drop_axes: ChunkCoords + + def __init__( + self, + selection: BasicSelection, + shape: ChunkCoords, + chunk_grid: ChunkGrid, + ) -> None: + chunk_shape = get_chunk_shape(chunk_grid) + # handle ellipsis + selection_normalized = replace_ellipsis(selection, shape) + + # setup per-dimension indexers + dim_indexers: list[IntDimIndexer | SliceDimIndexer] = [] + for dim_sel, dim_len, dim_chunk_len in zip( + selection_normalized, shape, chunk_shape, strict=True + ): + dim_indexer: IntDimIndexer | SliceDimIndexer + if is_integer(dim_sel): + dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) + + elif is_slice(dim_sel): + dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) + + else: + raise IndexError( + "unsupported selection item for basic indexing; " + f"expected integer or slice, got {type(dim_sel)!r}" + ) + + dim_indexers.append(dim_indexer) + + object.__setattr__(self, "dim_indexers", dim_indexers) + object.__setattr__( + self, + "shape", + tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)), + ) + object.__setattr__(self, "drop_axes", ()) + + def __iter__(self) -> Iterator[ChunkProjection]: + for dim_projections in itertools.product(*self.dim_indexers): + chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) + chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) + + yield ChunkProjection(chunk_coords, chunk_selection, out_selection) + + +@dataclass(frozen=True) +class BoolArrayDimIndexer: + dim_sel: npt.NDArray[np.bool_] + dim_len: int + dim_chunk_len: int + nchunks: int + + chunk_nitems: npt.NDArray[Any] + chunk_nitems_cumsum: npt.NDArray[Any] + nitems: int + dim_chunk_ixs: npt.NDArray[np.intp] + + def __init__(self, dim_sel: npt.NDArray[np.bool_], dim_len: int, dim_chunk_len: int) -> None: + # check number of dimensions + if not is_bool_array(dim_sel, 1): + raise IndexError("Boolean arrays in an orthogonal selection must be 1-dimensional only") + + # check shape + if dim_sel.shape[0] != dim_len: + raise IndexError( + f"Boolean array has the wrong length for dimension; expected {dim_len}, got {dim_sel.shape[0]}" + ) + + # precompute number of selected items for each chunk + nchunks = ceildiv(dim_len, dim_chunk_len) + chunk_nitems = np.zeros(nchunks, dtype="i8") + for dim_chunk_ix in range(nchunks): + dim_offset = dim_chunk_ix * dim_chunk_len + chunk_nitems[dim_chunk_ix] = np.count_nonzero( + dim_sel[dim_offset : dim_offset + dim_chunk_len] + ) + chunk_nitems_cumsum = np.cumsum(chunk_nitems) + nitems = chunk_nitems_cumsum[-1] + dim_chunk_ixs = np.nonzero(chunk_nitems)[0] + + # store attributes + object.__setattr__(self, "dim_sel", dim_sel) + object.__setattr__(self, "dim_len", dim_len) + object.__setattr__(self, "dim_chunk_len", dim_chunk_len) + object.__setattr__(self, "nchunks", nchunks) + object.__setattr__(self, "chunk_nitems", chunk_nitems) + object.__setattr__(self, "chunk_nitems_cumsum", chunk_nitems_cumsum) + object.__setattr__(self, "nitems", nitems) + object.__setattr__(self, "dim_chunk_ixs", dim_chunk_ixs) + + def __iter__(self) -> Iterator[ChunkDimProjection]: + # iterate over chunks with at least one item + for dim_chunk_ix in self.dim_chunk_ixs: + # find region in chunk + dim_offset = dim_chunk_ix * self.dim_chunk_len + dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] + + # pad out if final chunk + if dim_chunk_sel.shape[0] < self.dim_chunk_len: + tmp = np.zeros(self.dim_chunk_len, dtype=bool) + tmp[: dim_chunk_sel.shape[0]] = dim_chunk_sel + dim_chunk_sel = tmp + + # find region in output + if dim_chunk_ix == 0: + start = 0 + else: + start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] + stop = self.chunk_nitems_cumsum[dim_chunk_ix] + dim_out_sel = slice(start, stop) + + yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) + + +class Order(Enum): + """ + Enum for indexing order. + """ + + UNKNOWN = 0 + INCREASING = 1 + DECREASING = 2 + UNORDERED = 3 + + @staticmethod + def check(a: npt.NDArray[Any]) -> Order: + diff = np.diff(a) + diff_positive = diff >= 0 + n_diff_positive = np.count_nonzero(diff_positive) + all_increasing = n_diff_positive == len(diff_positive) + any_increasing = n_diff_positive > 0 + if all_increasing: + order = Order.INCREASING + elif any_increasing: + order = Order.UNORDERED + else: + order = Order.DECREASING + return order + + +def wraparound_indices(x: npt.NDArray[Any], dim_len: int) -> None: + loc_neg = x < 0 + if np.any(loc_neg): + x[loc_neg] = x[loc_neg] + dim_len + + +def boundscheck_indices(x: npt.NDArray[Any], dim_len: int) -> None: + if np.any(x < 0) or np.any(x >= dim_len): + raise BoundsCheckError(dim_len) + + +@dataclass(frozen=True) +class IntArrayDimIndexer: + """Integer array selection against a single dimension.""" + + dim_len: int + dim_chunk_len: int + nchunks: int + nitems: int + order: Order + dim_sel: npt.NDArray[np.intp] + dim_out_sel: npt.NDArray[np.intp] + chunk_nitems: int + dim_chunk_ixs: npt.NDArray[np.intp] + chunk_nitems_cumsum: npt.NDArray[np.intp] + + def __init__( + self, + dim_sel: npt.NDArray[np.intp], + dim_len: int, + dim_chunk_len: int, + wraparound: bool = True, + boundscheck: bool = True, + order: Order = Order.UNKNOWN, + ) -> None: + # ensure 1d array + dim_sel = np.asanyarray(dim_sel) + if not is_integer_array(dim_sel, 1): + raise IndexError("integer arrays in an orthogonal selection must be 1-dimensional only") + + nitems = len(dim_sel) + nchunks = ceildiv(dim_len, dim_chunk_len) + + # handle wraparound + if wraparound: + wraparound_indices(dim_sel, dim_len) + + # handle out of bounds + if boundscheck: + boundscheck_indices(dim_sel, dim_len) + + # determine which chunk is needed for each selection item + # note: for dense integer selections, the division operation here is the + # bottleneck + dim_sel_chunk = dim_sel // dim_chunk_len + + # determine order of indices + if order == Order.UNKNOWN: + order = Order.check(dim_sel) + order = Order(order) + + if order == Order.INCREASING: + dim_out_sel = None + elif order == Order.DECREASING: + dim_sel = dim_sel[::-1] + # TODO should be possible to do this without creating an arange + dim_out_sel = np.arange(nitems - 1, -1, -1) + else: + # sort indices to group by chunk + dim_out_sel = np.argsort(dim_sel_chunk) + dim_sel = np.take(dim_sel, dim_out_sel) + + # precompute number of selected items for each chunk + chunk_nitems = np.bincount(dim_sel_chunk, minlength=nchunks) + + # find chunks that we need to visit + dim_chunk_ixs = np.nonzero(chunk_nitems)[0] + + # compute offsets into the output array + chunk_nitems_cumsum = np.cumsum(chunk_nitems) + + # store attributes + object.__setattr__(self, "dim_len", dim_len) + object.__setattr__(self, "dim_chunk_len", dim_chunk_len) + object.__setattr__(self, "nchunks", nchunks) + object.__setattr__(self, "nitems", nitems) + object.__setattr__(self, "order", order) + object.__setattr__(self, "dim_sel", dim_sel) + object.__setattr__(self, "dim_out_sel", dim_out_sel) + object.__setattr__(self, "chunk_nitems", chunk_nitems) + object.__setattr__(self, "dim_chunk_ixs", dim_chunk_ixs) + object.__setattr__(self, "chunk_nitems_cumsum", chunk_nitems_cumsum) + + def __iter__(self) -> Iterator[ChunkDimProjection]: + for dim_chunk_ix in self.dim_chunk_ixs: + dim_out_sel: slice | npt.NDArray[np.intp] + # find region in output + if dim_chunk_ix == 0: + start = 0 + else: + start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] + stop = self.chunk_nitems_cumsum[dim_chunk_ix] + if self.order == Order.INCREASING: + dim_out_sel = slice(start, stop) + else: + dim_out_sel = self.dim_out_sel[start:stop] + + # find region in chunk + dim_offset = dim_chunk_ix * self.dim_chunk_len + dim_chunk_sel = self.dim_sel[start:stop] - dim_offset + + yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) + + +def slice_to_range(s: slice, length: int) -> range: + return range(*s.indices(length)) + + +def ix_(selection: Any, shape: ChunkCoords) -> npt.NDArray[np.intp]: + """Convert an orthogonal selection to a numpy advanced (fancy) selection, like ``numpy.ix_`` + but with support for slices and single ints.""" + + # normalisation + selection = replace_ellipsis(selection, shape) + + # replace slice and int as these are not supported by numpy.ix_ + selection = [ + slice_to_range(dim_sel, dim_len) + if isinstance(dim_sel, slice) + else [dim_sel] + if is_integer(dim_sel) + else dim_sel + for dim_sel, dim_len in zip(selection, shape, strict=True) + ] + + # now get numpy to convert to a coordinate selection + selection = np.ix_(*selection) + + return cast(npt.NDArray[np.intp], selection) + + +def oindex(a: npt.NDArray[Any], selection: Selection) -> npt.NDArray[Any]: + """Implementation of orthogonal indexing with slices and ints.""" + selection = replace_ellipsis(selection, a.shape) + drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) + selection = ix_(selection, a.shape) + result = a[selection] + if drop_axes: + result = result.squeeze(axis=drop_axes) + return result + + +def oindex_set(a: npt.NDArray[Any], selection: Selection, value: Any) -> None: + selection = replace_ellipsis(selection, a.shape) + drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) + selection = ix_(selection, a.shape) + if not np.isscalar(value) and drop_axes: + value = np.asanyarray(value) + value_selection: list[Selector | None] = [slice(None)] * len(a.shape) + for i in drop_axes: + value_selection[i] = np.newaxis + value = value[tuple(value_selection)] + a[selection] = value + + +@dataclass(frozen=True) +class OrthogonalIndexer(Indexer): + dim_indexers: list[IntDimIndexer | SliceDimIndexer | IntArrayDimIndexer | BoolArrayDimIndexer] + shape: ChunkCoords + chunk_shape: ChunkCoords + is_advanced: bool + drop_axes: tuple[int, ...] + + def __init__(self, selection: Selection, shape: ChunkCoords, chunk_grid: ChunkGrid) -> None: + chunk_shape = get_chunk_shape(chunk_grid) + + # handle ellipsis + selection = replace_ellipsis(selection, shape) + + # normalize list to array + selection = replace_lists(selection) + + # setup per-dimension indexers + dim_indexers: list[ + IntDimIndexer | SliceDimIndexer | IntArrayDimIndexer | BoolArrayDimIndexer + ] = [] + for dim_sel, dim_len, dim_chunk_len in zip(selection, shape, chunk_shape, strict=True): + dim_indexer: IntDimIndexer | SliceDimIndexer | IntArrayDimIndexer | BoolArrayDimIndexer + if is_integer(dim_sel): + dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) + + elif isinstance(dim_sel, slice): + dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) + + elif is_integer_array(dim_sel): + dim_indexer = IntArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) + + elif is_bool_array(dim_sel): + dim_indexer = BoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) + + else: + raise IndexError( + "unsupported selection item for orthogonal indexing; " + "expected integer, slice, integer array or Boolean " + f"array, got {type(dim_sel)!r}" + ) + + dim_indexers.append(dim_indexer) + + shape = tuple(s.nitems for s in dim_indexers if not isinstance(s, IntDimIndexer)) + is_advanced = not is_basic_selection(selection) + if is_advanced: + drop_axes = tuple( + i + for i, dim_indexer in enumerate(dim_indexers) + if isinstance(dim_indexer, IntDimIndexer) + ) + else: + drop_axes = () + + object.__setattr__(self, "dim_indexers", dim_indexers) + object.__setattr__(self, "shape", shape) + object.__setattr__(self, "chunk_shape", chunk_shape) + object.__setattr__(self, "is_advanced", is_advanced) + object.__setattr__(self, "drop_axes", drop_axes) + + def __iter__(self) -> Iterator[ChunkProjection]: + for dim_projections in itertools.product(*self.dim_indexers): + chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) + chunk_selection: tuple[Selector, ...] | npt.NDArray[Any] = tuple( + p.dim_chunk_sel for p in dim_projections + ) + out_selection: tuple[Selector, ...] | npt.NDArray[Any] = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) + + # handle advanced indexing arrays orthogonally + if self.is_advanced: + # N.B., numpy doesn't support orthogonal indexing directly as yet, + # so need to work around via np.ix_. Also np.ix_ does not support a + # mixture of arrays and slices or integers, so need to convert slices + # and integers into ranges. + chunk_selection = ix_(chunk_selection, self.chunk_shape) + + # special case for non-monotonic indices + if not is_basic_selection(out_selection): + out_selection = ix_(out_selection, self.shape) + + yield ChunkProjection(chunk_coords, chunk_selection, out_selection) + + +@dataclass(frozen=True) +class OIndex: + array: Array + + # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool + def __getitem__(self, selection: OrthogonalSelection | Array) -> NDArrayLike: + from zarr.core.array import Array + + # if input is a Zarr array, we materialize it now. + if isinstance(selection, Array): + selection = _zarr_array_to_int_or_bool_array(selection) + + fields, new_selection = pop_fields(selection) + new_selection = ensure_tuple(new_selection) + new_selection = replace_lists(new_selection) + return self.array.get_orthogonal_selection( + cast(OrthogonalSelection, new_selection), fields=fields + ) + + def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> None: + fields, new_selection = pop_fields(selection) + new_selection = ensure_tuple(new_selection) + new_selection = replace_lists(new_selection) + return self.array.set_orthogonal_selection( + cast(OrthogonalSelection, new_selection), value, fields=fields + ) + + +@dataclass(frozen=True) +class BlockIndexer(Indexer): + dim_indexers: list[SliceDimIndexer] + shape: ChunkCoords + drop_axes: ChunkCoords + + def __init__( + self, selection: BasicSelection, shape: ChunkCoords, chunk_grid: ChunkGrid + ) -> None: + chunk_shape = get_chunk_shape(chunk_grid) + + # handle ellipsis + selection_normalized = replace_ellipsis(selection, shape) + + # normalize list to array + selection_normalized = replace_lists(selection_normalized) + + # setup per-dimension indexers + dim_indexers = [] + for dim_sel, dim_len, dim_chunk_size in zip( + selection_normalized, shape, chunk_shape, strict=True + ): + dim_numchunks = int(np.ceil(dim_len / dim_chunk_size)) + + if is_integer(dim_sel): + if dim_sel < 0: + dim_sel = dim_numchunks + dim_sel + + start = dim_sel * dim_chunk_size + stop = start + dim_chunk_size + slice_ = slice(start, stop) + + elif is_slice(dim_sel): + start = dim_sel.start if dim_sel.start is not None else 0 + stop = dim_sel.stop if dim_sel.stop is not None else dim_numchunks + + if dim_sel.step not in {1, None}: + raise IndexError( + "unsupported selection item for block indexing; " + f"expected integer or slice with step=1, got {type(dim_sel)!r}" + ) + + # Can't reuse wraparound_indices because it expects a numpy array + # We have integers here. + if start < 0: + start = dim_numchunks + start + if stop < 0: + stop = dim_numchunks + stop + + start = start * dim_chunk_size + stop = stop * dim_chunk_size + slice_ = slice(start, stop) + + else: + raise IndexError( + "unsupported selection item for block indexing; " + f"expected integer or slice, got {type(dim_sel)!r}" + ) + + dim_indexer = SliceDimIndexer(slice_, dim_len, dim_chunk_size) + dim_indexers.append(dim_indexer) + + if start >= dim_len or start < 0: + raise BoundsCheckError(dim_len) + + shape = tuple(s.nitems for s in dim_indexers) + + object.__setattr__(self, "dim_indexers", dim_indexers) + object.__setattr__(self, "shape", shape) + object.__setattr__(self, "drop_axes", ()) + + def __iter__(self) -> Iterator[ChunkProjection]: + for dim_projections in itertools.product(*self.dim_indexers): + chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) + chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) + + yield ChunkProjection(chunk_coords, chunk_selection, out_selection) + + +@dataclass(frozen=True) +class BlockIndex: + array: Array + + def __getitem__(self, selection: BasicSelection) -> NDArrayLike: + fields, new_selection = pop_fields(selection) + new_selection = ensure_tuple(new_selection) + new_selection = replace_lists(new_selection) + return self.array.get_block_selection(cast(BasicSelection, new_selection), fields=fields) + + def __setitem__(self, selection: BasicSelection, value: npt.ArrayLike) -> None: + fields, new_selection = pop_fields(selection) + new_selection = ensure_tuple(new_selection) + new_selection = replace_lists(new_selection) + return self.array.set_block_selection( + cast(BasicSelection, new_selection), value, fields=fields + ) + + +def is_coordinate_selection( + selection: SelectionNormalized, shape: ChunkCoords +) -> TypeGuard[CoordinateSelectionNormalized]: + return ( + isinstance(selection, tuple) + and len(selection) == len(shape) + and all(is_integer(dim_sel) or is_integer_array(dim_sel) for dim_sel in selection) + ) + + +def is_mask_selection(selection: Selection, shape: ChunkCoords) -> TypeGuard[MaskSelection]: + return ( + isinstance(selection, tuple) + and len(selection) == 1 + and is_bool_array(selection[0]) + and selection[0].shape == shape + ) + + +@dataclass(frozen=True) +class CoordinateIndexer(Indexer): + sel_shape: ChunkCoords + selection: CoordinateSelectionNormalized + sel_sort: npt.NDArray[np.intp] | None + chunk_nitems_cumsum: npt.NDArray[np.intp] + chunk_rixs: npt.NDArray[np.intp] + chunk_mixs: tuple[npt.NDArray[np.intp], ...] + shape: ChunkCoords + chunk_shape: ChunkCoords + drop_axes: ChunkCoords + + def __init__( + self, selection: CoordinateSelection, shape: ChunkCoords, chunk_grid: ChunkGrid + ) -> None: + chunk_shape = get_chunk_shape(chunk_grid) + + cdata_shape: ChunkCoords + if shape == (): + cdata_shape = (1,) + else: + cdata_shape = tuple(math.ceil(s / c) for s, c in zip(shape, chunk_shape, strict=True)) + nchunks = reduce(operator.mul, cdata_shape, 1) + + # some initial normalization + selection_normalized = cast(CoordinateSelectionNormalized, ensure_tuple(selection)) + selection_normalized = tuple( + np.asarray([i]) if is_integer(i) else i for i in selection_normalized + ) + selection_normalized = cast( + CoordinateSelectionNormalized, replace_lists(selection_normalized) + ) + + # validation + if not is_coordinate_selection(selection_normalized, shape): + raise IndexError( + "invalid coordinate selection; expected one integer " + "(coordinate) array per dimension of the target array, " + f"got {selection!r}" + ) + + # handle wraparound, boundscheck + for dim_sel, dim_len in zip(selection_normalized, shape, strict=True): + # handle wraparound + wraparound_indices(dim_sel, dim_len) + + # handle out of bounds + boundscheck_indices(dim_sel, dim_len) + + # compute chunk index for each point in the selection + chunks_multi_index = tuple( + dim_sel // dim_chunk_len + for (dim_sel, dim_chunk_len) in zip(selection_normalized, chunk_shape, strict=True) + ) + + # broadcast selection - this will raise error if array dimensions don't match + selection_broadcast = tuple(np.broadcast_arrays(*selection_normalized)) + chunks_multi_index_broadcast = np.broadcast_arrays(*chunks_multi_index) + + # remember shape of selection, because we will flatten indices for processing + sel_shape = selection_broadcast[0].shape if selection_broadcast[0].shape else (1,) + + # flatten selection + selection_broadcast = tuple(dim_sel.reshape(-1) for dim_sel in selection_broadcast) + chunks_multi_index_broadcast = tuple( + dim_chunks.reshape(-1) for dim_chunks in chunks_multi_index_broadcast + ) + + # ravel chunk indices + chunks_raveled_indices = np.ravel_multi_index( + chunks_multi_index_broadcast, dims=cdata_shape + ) + + # group points by chunk + if np.any(np.diff(chunks_raveled_indices) < 0): + # optimisation, only sort if needed + sel_sort = np.argsort(chunks_raveled_indices) + selection_broadcast = tuple(dim_sel[sel_sort] for dim_sel in selection_broadcast) + else: + sel_sort = None + + shape = selection_broadcast[0].shape if selection_broadcast[0].shape else (1,) + + # precompute number of selected items for each chunk + chunk_nitems = np.bincount(chunks_raveled_indices, minlength=nchunks) + chunk_nitems_cumsum = np.cumsum(chunk_nitems) + # locate the chunks we need to process + chunk_rixs = np.nonzero(chunk_nitems)[0] + + # unravel chunk indices + chunk_mixs = np.unravel_index(chunk_rixs, cdata_shape) + + object.__setattr__(self, "sel_shape", sel_shape) + object.__setattr__(self, "selection", selection_broadcast) + object.__setattr__(self, "sel_sort", sel_sort) + object.__setattr__(self, "chunk_nitems_cumsum", chunk_nitems_cumsum) + object.__setattr__(self, "chunk_rixs", chunk_rixs) + object.__setattr__(self, "chunk_mixs", chunk_mixs) + object.__setattr__(self, "chunk_shape", chunk_shape) + object.__setattr__(self, "shape", shape) + object.__setattr__(self, "drop_axes", ()) + + def __iter__(self) -> Iterator[ChunkProjection]: + # iterate over chunks + for i, chunk_rix in enumerate(self.chunk_rixs): + chunk_coords = tuple(m[i] for m in self.chunk_mixs) + if chunk_rix == 0: + start = 0 + else: + start = self.chunk_nitems_cumsum[chunk_rix - 1] + stop = self.chunk_nitems_cumsum[chunk_rix] + out_selection: slice | npt.NDArray[np.intp] + if self.sel_sort is None: + out_selection = slice(start, stop) + else: + out_selection = self.sel_sort[start:stop] + + chunk_offsets = tuple( + dim_chunk_ix * dim_chunk_len + for dim_chunk_ix, dim_chunk_len in zip(chunk_coords, self.chunk_shape, strict=True) + ) + chunk_selection = tuple( + dim_sel[start:stop] - dim_chunk_offset + for (dim_sel, dim_chunk_offset) in zip(self.selection, chunk_offsets, strict=True) + ) + + yield ChunkProjection(chunk_coords, chunk_selection, out_selection) + + +@dataclass(frozen=True) +class MaskIndexer(CoordinateIndexer): + def __init__(self, selection: MaskSelection, shape: ChunkCoords, chunk_grid: ChunkGrid) -> None: + # some initial normalization + selection_normalized = cast(tuple[MaskSelection], ensure_tuple(selection)) + selection_normalized = cast(tuple[MaskSelection], replace_lists(selection_normalized)) + + # validation + if not is_mask_selection(selection_normalized, shape): + raise IndexError( + "invalid mask selection; expected one Boolean (mask)" + f"array with the same shape as the target array, got {selection_normalized!r}" + ) + + # convert to indices + selection_indices = np.nonzero(selection_normalized[0]) + + # delegate the rest to superclass + super().__init__(selection_indices, shape, chunk_grid) + + +@dataclass(frozen=True) +class VIndex: + array: Array + + # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool + def __getitem__(self, selection: CoordinateSelection | MaskSelection | Array) -> NDArrayLike: + from zarr.core.array import Array + + # if input is a Zarr array, we materialize it now. + if isinstance(selection, Array): + selection = _zarr_array_to_int_or_bool_array(selection) + fields, new_selection = pop_fields(selection) + new_selection = ensure_tuple(new_selection) + new_selection = replace_lists(new_selection) + if is_coordinate_selection(new_selection, self.array.shape): + return self.array.get_coordinate_selection(new_selection, fields=fields) + elif is_mask_selection(new_selection, self.array.shape): + return self.array.get_mask_selection(new_selection, fields=fields) + else: + raise VindexInvalidSelectionError(new_selection) + + def __setitem__( + self, selection: CoordinateSelection | MaskSelection, value: npt.ArrayLike + ) -> None: + fields, new_selection = pop_fields(selection) + new_selection = ensure_tuple(new_selection) + new_selection = replace_lists(new_selection) + if is_coordinate_selection(new_selection, self.array.shape): + self.array.set_coordinate_selection(new_selection, value, fields=fields) + elif is_mask_selection(new_selection, self.array.shape): + self.array.set_mask_selection(new_selection, value, fields=fields) + else: + raise VindexInvalidSelectionError(new_selection) + + +def check_fields(fields: Fields | None, dtype: np.dtype[Any]) -> np.dtype[Any]: + # early out + if fields is None: + return dtype + # check type + if not isinstance(fields, str | list | tuple): + raise IndexError( + f"'fields' argument must be a string or list of strings; found {type(fields)!r}" + ) + if fields: + if dtype.names is None: + raise IndexError("invalid 'fields' argument, array does not have any fields") + try: + if isinstance(fields, str): + # single field selection + out_dtype = dtype[fields] + else: + # multiple field selection + out_dtype = np.dtype([(f, dtype[f]) for f in fields]) + except KeyError as e: + raise IndexError(f"invalid 'fields' argument, field not found: {e!r}") from e + else: + return out_dtype + else: + return dtype + + +def check_no_multi_fields(fields: Fields | None) -> Fields | None: + if isinstance(fields, list): + if len(fields) == 1: + return fields[0] + elif len(fields) > 1: + raise IndexError("multiple fields are not supported for this operation") + return fields + + +def pop_fields(selection: SelectionWithFields) -> tuple[Fields | None, Selection]: + if isinstance(selection, str): + # single field selection + return selection, () + elif not isinstance(selection, tuple): + # single selection item, no fields + # leave selection as-is + return None, cast(Selection, selection) + else: + # multiple items, split fields from selection items + fields: Fields = [f for f in selection if isinstance(f, str)] + fields = fields[0] if len(fields) == 1 else fields + selection_tuple = tuple(s for s in selection if not isinstance(s, str)) + selection = cast( + Selection, selection_tuple[0] if len(selection_tuple) == 1 else selection_tuple + ) + return fields, selection + + +def make_slice_selection(selection: Any) -> list[slice]: + ls: list[slice] = [] + for dim_selection in selection: + if is_integer(dim_selection): + ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) + elif isinstance(dim_selection, np.ndarray): + if len(dim_selection) == 1: + ls.append(slice(int(dim_selection[0]), int(dim_selection[0]) + 1, 1)) + else: + raise ArrayIndexError + else: + ls.append(dim_selection) + return ls + + +def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords: + # Inspired by compressed morton code as implemented in Neuroglancer + # https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code + bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape) + max_coords_bits = max(bits) + input_bit = 0 + input_value = z + out = [0] * len(chunk_shape) + + for coord_bit in range(max_coords_bits): + for dim in range(len(chunk_shape)): + if coord_bit < bits[dim]: + bit = (input_value >> input_bit) & 1 + out[dim] |= bit << coord_bit + input_bit += 1 + return tuple(out) + + +def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]: + for i in range(product(chunk_shape)): + yield decode_morton(i, chunk_shape) + + +def c_order_iter(chunks_per_shard: ChunkCoords) -> Iterator[ChunkCoords]: + return itertools.product(*(range(x) for x in chunks_per_shard)) + + +def is_total_slice(item: Selection, shape: ChunkCoords) -> bool: + """Determine whether `item` specifies a complete slice of array with the + given `shape`. Used to optimize __setitem__ operations on the Chunk + class.""" + + # N.B., assume shape is normalized + if item == slice(None): + return True + if isinstance(item, slice): + item = (item,) + if isinstance(item, tuple): + return all( + isinstance(dim_sel, slice) + and ( + (dim_sel == slice(None)) + or ((dim_sel.stop - dim_sel.start == dim_len) and (dim_sel.step in [1, None])) + ) + for dim_sel, dim_len in zip(item, shape, strict=False) + ) + else: + raise TypeError(f"expected slice or tuple of slices, found {item!r}") + + +def get_indexer( + selection: SelectionWithFields, shape: ChunkCoords, chunk_grid: ChunkGrid +) -> Indexer: + _, pure_selection = pop_fields(selection) + if is_pure_fancy_indexing(pure_selection, len(shape)): + new_selection = ensure_tuple(selection) + new_selection = replace_lists(new_selection) + if is_coordinate_selection(new_selection, shape): + return CoordinateIndexer(cast(CoordinateSelection, selection), shape, chunk_grid) + elif is_mask_selection(new_selection, shape): + return MaskIndexer(cast(MaskSelection, selection), shape, chunk_grid) + else: + raise VindexInvalidSelectionError(new_selection) + elif is_pure_orthogonal_indexing(pure_selection, len(shape)): + return OrthogonalIndexer(cast(OrthogonalSelection, selection), shape, chunk_grid) + else: + return BasicIndexer(cast(BasicSelection, selection), shape, chunk_grid) diff --git a/src/zarr/core/metadata/__init__.py b/src/zarr/core/metadata/__init__.py new file mode 100644 index 0000000000..f4374d9aba --- /dev/null +++ b/src/zarr/core/metadata/__init__.py @@ -0,0 +1,17 @@ +from typing import TypeAlias, TypeVar + +from .v2 import ArrayV2Metadata, ArrayV2MetadataDict +from .v3 import ArrayV3Metadata, ArrayV3MetadataDict + +ArrayMetadata: TypeAlias = ArrayV2Metadata | ArrayV3Metadata +ArrayMetadataDict: TypeAlias = ArrayV2MetadataDict | ArrayV3MetadataDict +T_ArrayMetadata = TypeVar("T_ArrayMetadata", ArrayV2Metadata, ArrayV3Metadata) + +__all__ = [ + "ArrayV2Metadata", + "ArrayV3Metadata", + "ArrayMetadata", + "ArrayMetadataDict", + "ArrayV3MetadataDict", + "ArrayV2MetadataDict", +] diff --git a/src/zarr/core/metadata/common.py b/src/zarr/core/metadata/common.py new file mode 100644 index 0000000000..3adb65cf02 --- /dev/null +++ b/src/zarr/core/metadata/common.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from zarr.core.common import JSON + + +def parse_attributes(data: None | dict[str, JSON]) -> dict[str, JSON]: + if data is None: + return {} + + return data diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py new file mode 100644 index 0000000000..c5f34d2776 --- /dev/null +++ b/src/zarr/core/metadata/v2.py @@ -0,0 +1,327 @@ +from __future__ import annotations + +import base64 +from collections.abc import Iterable +from enum import Enum +from functools import cached_property +from typing import TYPE_CHECKING, TypedDict, cast + +from zarr.abc.metadata import Metadata + +if TYPE_CHECKING: + from typing import Any, Literal, Self + + import numpy.typing as npt + + from zarr.core.buffer import Buffer, BufferPrototype + from zarr.core.common import JSON, ChunkCoords + +import json +from dataclasses import dataclass, field, fields, replace + +import numcodecs +import numpy as np + +from zarr.core.array_spec import ArraySpec +from zarr.core.chunk_grids import RegularChunkGrid +from zarr.core.chunk_key_encodings import parse_separator +from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, parse_shapelike +from zarr.core.config import config, parse_indexing_order +from zarr.core.metadata.common import parse_attributes + + +class ArrayV2MetadataDict(TypedDict): + """ + A typed dictionary model for zarr v2 metadata. + """ + + zarr_format: Literal[2] + attributes: dict[str, JSON] + + +@dataclass(frozen=True, kw_only=True) +class ArrayV2Metadata(Metadata): + shape: ChunkCoords + chunks: tuple[int, ...] + dtype: np.dtype[Any] + fill_value: None | int | float | str | bytes = 0 + order: Literal["C", "F"] = "C" + filters: tuple[numcodecs.abc.Codec, ...] | None = None + dimension_separator: Literal[".", "/"] = "." + compressor: numcodecs.abc.Codec | None = None + attributes: dict[str, JSON] = field(default_factory=dict) + zarr_format: Literal[2] = field(init=False, default=2) + + def __init__( + self, + *, + shape: ChunkCoords, + dtype: npt.DTypeLike, + chunks: ChunkCoords, + fill_value: Any, + order: Literal["C", "F"], + dimension_separator: Literal[".", "/"] = ".", + compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None, + filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None, + attributes: dict[str, JSON] | None = None, + ) -> None: + """ + Metadata for a Zarr version 2 array. + """ + shape_parsed = parse_shapelike(shape) + dtype_parsed = parse_dtype(dtype) + chunks_parsed = parse_shapelike(chunks) + compressor_parsed = parse_compressor(compressor) + order_parsed = parse_indexing_order(order) + dimension_separator_parsed = parse_separator(dimension_separator) + filters_parsed = parse_filters(filters) + fill_value_parsed = parse_fill_value(fill_value, dtype=dtype_parsed) + attributes_parsed = parse_attributes(attributes) + + object.__setattr__(self, "shape", shape_parsed) + object.__setattr__(self, "dtype", dtype_parsed) + object.__setattr__(self, "chunks", chunks_parsed) + object.__setattr__(self, "compressor", compressor_parsed) + object.__setattr__(self, "order", order_parsed) + object.__setattr__(self, "dimension_separator", dimension_separator_parsed) + object.__setattr__(self, "filters", filters_parsed) + object.__setattr__(self, "fill_value", fill_value_parsed) + object.__setattr__(self, "attributes", attributes_parsed) + + # ensure that the metadata document is consistent + _ = parse_metadata(self) + + @property + def ndim(self) -> int: + return len(self.shape) + + @cached_property + def chunk_grid(self) -> RegularChunkGrid: + return RegularChunkGrid(chunk_shape=self.chunks) + + def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: + def _json_convert( + o: Any, + ) -> Any: + if isinstance(o, np.dtype): + if o.fields is None: + return o.str + else: + return o.descr + if isinstance(o, numcodecs.abc.Codec): + return o.get_config() + if np.isscalar(o): + out: Any + if hasattr(o, "dtype") and o.dtype.kind == "M" and hasattr(o, "view"): + # https://github.com/zarr-developers/zarr-python/issues/2119 + # `.item()` on a datetime type might or might not return an + # integer, depending on the value. + # Explicitly cast to an int first, and then grab .item() + out = o.view("i8").item() + else: + # convert numpy scalar to python type, and pass + # python types through + out = getattr(o, "item", lambda: o)() + if isinstance(out, complex): + # python complex types are not JSON serializable, so we use the + # serialization defined in the zarr v3 spec + return [out.real, out.imag] + return out + if isinstance(o, Enum): + return o.name + raise TypeError + + zarray_dict = self.to_dict() + zattrs_dict = zarray_dict.pop("attributes", {}) + json_indent = config.get("json_indent") + return { + ZARRAY_JSON: prototype.buffer.from_bytes( + json.dumps(zarray_dict, default=_json_convert, indent=json_indent).encode() + ), + ZATTRS_JSON: prototype.buffer.from_bytes( + json.dumps(zattrs_dict, indent=json_indent).encode() + ), + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata: + # make a copy to protect the original from modification + _data = data.copy() + # check that the zarr_format attribute is correct + _ = parse_zarr_format(_data.pop("zarr_format")) + dtype = parse_dtype(_data["dtype"]) + + if dtype.kind in "SV": + fill_value_encoded = _data.get("fill_value") + if fill_value_encoded is not None: + fill_value = base64.standard_b64decode(fill_value_encoded) + _data["fill_value"] = fill_value + + # zarr v2 allowed arbitrary keys here. + # We don't want the ArrayV2Metadata constructor to fail just because someone put an + # extra key in the metadata. + expected = {x.name for x in fields(cls)} + # https://github.com/zarr-developers/zarr-python/issues/2269 + # handle the renames + expected |= {"dtype", "chunks"} + + _data = {k: v for k, v in _data.items() if k in expected} + + return cls(**_data) + + def to_dict(self) -> dict[str, JSON]: + zarray_dict = super().to_dict() + + if self.dtype.kind in "SV" and self.fill_value is not None: + # There's a relationship between self.dtype and self.fill_value + # that mypy isn't aware of. The fact that we have S or V dtype here + # means we should have a bytes-type fill_value. + fill_value = base64.standard_b64encode(cast(bytes, self.fill_value)).decode("ascii") + zarray_dict["fill_value"] = fill_value + + _ = zarray_dict.pop("dtype") + zarray_dict["dtype"] = self.dtype.str + + return zarray_dict + + def get_chunk_spec( + self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype + ) -> ArraySpec: + return ArraySpec( + shape=self.chunks, + dtype=self.dtype, + fill_value=self.fill_value, + order=order, + prototype=prototype, + ) + + def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: + chunk_identifier = self.dimension_separator.join(map(str, chunk_coords)) + return "0" if chunk_identifier == "" else chunk_identifier + + def update_shape(self, shape: ChunkCoords) -> Self: + return replace(self, shape=shape) + + def update_attributes(self, attributes: dict[str, JSON]) -> Self: + return replace(self, attributes=attributes) + + +def parse_dtype(data: npt.DTypeLike) -> np.dtype[Any]: + return np.dtype(data) + + +def parse_zarr_format(data: object) -> Literal[2]: + if data == 2: + return 2 + raise ValueError(f"Invalid value. Expected 2. Got {data}.") + + +def parse_filters(data: object) -> tuple[numcodecs.abc.Codec, ...] | None: + """ + Parse a potential tuple of filters + """ + out: list[numcodecs.abc.Codec] = [] + + if data is None: + return data + if isinstance(data, Iterable): + for idx, val in enumerate(data): + if isinstance(val, numcodecs.abc.Codec): + out.append(val) + elif isinstance(val, dict): + out.append(numcodecs.get_codec(val)) + else: + msg = f"Invalid filter at index {idx}. Expected a numcodecs.abc.Codec or a dict representation of numcodecs.abc.Codec. Got {type(val)} instead." + raise TypeError(msg) + return tuple(out) + msg = f"Invalid filters. Expected None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec. Got {type(data)} instead." + raise TypeError(msg) + + +def parse_compressor(data: object) -> numcodecs.abc.Codec | None: + """ + Parse a potential compressor. + """ + if data is None or isinstance(data, numcodecs.abc.Codec): + return data + if isinstance(data, dict): + return numcodecs.get_codec(data) + msg = f"Invalid compressor. Expected None, a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead." + raise ValueError(msg) + + +def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata: + if (l_chunks := len(data.chunks)) != (l_shape := len(data.shape)): + msg = ( + f"The `shape` and `chunks` attributes must have the same length. " + f"`chunks` has length {l_chunks}, but `shape` has length {l_shape}." + ) + raise ValueError(msg) + return data + + +def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any: + """ + Parse a potential fill value into a value that is compatible with the provided dtype. + + Parameters + ---------- + fill_value: Any + A potential fill value. + dtype: np.dtype[Any] + A numpy dtype. + + Returns + An instance of `dtype`, or `None`, or any python object (in the case of an object dtype) + """ + + if fill_value is None or dtype.hasobject: + # no fill value + pass + elif not isinstance(fill_value, np.void) and fill_value == 0: + # this should be compatible across numpy versions for any array type, including + # structured arrays + fill_value = np.zeros((), dtype=dtype)[()] + + elif dtype.kind == "U": + # special case unicode because of encoding issues on Windows if passed through numpy + # https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713 + + if not isinstance(fill_value, str): + raise ValueError( + f"fill_value {fill_value!r} is not valid for dtype {dtype}; must be a unicode string" + ) + else: + try: + if isinstance(fill_value, bytes) and dtype.kind == "V": + # special case for numpy 1.14 compatibility + fill_value = np.array(fill_value, dtype=dtype.str).view(dtype)[()] + else: + fill_value = np.array(fill_value, dtype=dtype)[()] + + except Exception as e: + msg = f"Fill_value {fill_value} is not valid for dtype {dtype}." + raise ValueError(msg) from e + + return fill_value + + +def _default_fill_value(dtype: np.dtype[Any]) -> Any: + """ + Get the default fill value for a type. + + Notes + ----- + This differs from :func:`parse_fill_value`, which parses a fill value + stored in the Array metadata into an in-memory value. This only gives + the default fill value for some type. + + This is useful for reading Zarr V2 arrays, which allow the fill + value to be unspecified. + """ + if dtype.kind == "S": + return b"" + elif dtype.kind in "UO": + return "" + else: + return dtype.type(0) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py new file mode 100644 index 0000000000..b85932ef82 --- /dev/null +++ b/src/zarr/core/metadata/v3.py @@ -0,0 +1,631 @@ +from __future__ import annotations + +import warnings +from typing import TYPE_CHECKING, TypedDict, overload + +from zarr.abc.metadata import Metadata +from zarr.core.buffer.core import default_buffer_prototype + +if TYPE_CHECKING: + from typing import Self + + from zarr.core.buffer import Buffer, BufferPrototype + from zarr.core.chunk_grids import ChunkGrid + from zarr.core.common import JSON, ChunkCoords + +import json +from collections.abc import Iterable, Sequence +from dataclasses import dataclass, field, replace +from enum import Enum +from typing import Any, Literal, cast + +import numcodecs.abc +import numpy as np +import numpy.typing as npt + +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec +from zarr.core.array_spec import ArraySpec +from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid +from zarr.core.chunk_key_encodings import ChunkKeyEncoding +from zarr.core.common import ( + JSON, + ZARR_JSON, + ChunkCoords, + parse_named_configuration, + parse_shapelike, +) +from zarr.core.config import config +from zarr.core.metadata.common import parse_attributes +from zarr.core.strings import _STRING_DTYPE as STRING_NP_DTYPE +from zarr.errors import MetadataValidationError, NodeTypeValidationError +from zarr.registry import get_codec_class + +DEFAULT_DTYPE = "float64" + + +def parse_zarr_format(data: object) -> Literal[3]: + if data == 3: + return 3 + raise MetadataValidationError("zarr_format", 3, data) + + +def parse_node_type_array(data: object) -> Literal["array"]: + if data == "array": + return "array" + raise NodeTypeValidationError("node_type", "array", data) + + +def parse_codecs(data: object) -> tuple[Codec, ...]: + out: tuple[Codec, ...] = () + + if not isinstance(data, Iterable): + raise TypeError(f"Expected iterable, got {type(data)}") + + for c in data: + if isinstance( + c, ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec + ): # Can't use Codec here because of mypy limitation + out += (c,) + else: + name_parsed, _ = parse_named_configuration(c, require_configuration=False) + out += (get_codec_class(name_parsed).from_dict(c),) + + return out + + +def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None: + """Check that the codecs are valid for the given dtype""" + + # ensure that we have at least one ArrayBytesCodec + abcs: list[ArrayBytesCodec] = [] + for codec in codecs: + if isinstance(codec, ArrayBytesCodec): + abcs.append(codec) + if len(abcs) == 0: + raise ValueError("At least one ArrayBytesCodec is required.") + elif len(abcs) > 1: + raise ValueError("Only one ArrayBytesCodec is allowed.") + + abc = abcs[0] + + # we need to have special codecs if we are decoding vlen strings or bytestrings + # TODO: use codec ID instead of class name + codec_id = abc.__class__.__name__ + if dtype == DataType.string and not codec_id == "VLenUTF8Codec": + raise ValueError( + f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_id}`." + ) + if dtype == DataType.bytes and not codec_id == "VLenBytesCodec": + raise ValueError( + f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_id}`." + ) + + +def parse_dimension_names(data: object) -> tuple[str | None, ...] | None: + if data is None: + return data + elif isinstance(data, Iterable) and all(isinstance(x, type(None) | str) for x in data): + return tuple(data) + else: + msg = f"Expected either None or a iterable of str, got {type(data)}" + raise TypeError(msg) + + +def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: + """ + Parse storage_transformers. Zarr python cannot use storage transformers + at this time, so this function doesn't attempt to validate them. + """ + if data is None: + return () + if isinstance(data, Iterable): + if len(tuple(data)) >= 1: + return data # type: ignore[return-value] + else: + return () + raise TypeError( + f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead." + ) + + +class V3JsonEncoder(json.JSONEncoder): + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.indent = kwargs.pop("indent", config.get("json_indent")) + super().__init__(*args, **kwargs) + + def default(self, o: object) -> Any: + if isinstance(o, np.dtype): + return str(o) + if np.isscalar(o): + out: Any + if hasattr(o, "dtype") and o.dtype.kind == "M" and hasattr(o, "view"): + # https://github.com/zarr-developers/zarr-python/issues/2119 + # `.item()` on a datetime type might or might not return an + # integer, depending on the value. + # Explicitly cast to an int first, and then grab .item() + out = o.view("i8").item() + else: + # convert numpy scalar to python type, and pass + # python types through + out = getattr(o, "item", lambda: o)() + if isinstance(out, complex): + # python complex types are not JSON serializable, so we use the + # serialization defined in the zarr v3 spec + return [out.real, out.imag] + elif np.isnan(out): + return "NaN" + elif np.isinf(out): + return "Infinity" if out > 0 else "-Infinity" + return out + elif isinstance(o, Enum): + return o.name + # this serializes numcodecs compressors + # todo: implement to_dict for codecs + elif isinstance(o, numcodecs.abc.Codec): + config: dict[str, Any] = o.get_config() + return config + else: + return super().default(o) + + +def _replace_special_floats(obj: object) -> Any: + """Helper function to replace NaN/Inf/-Inf values with special strings + + Note: this cannot be done in the V3JsonEncoder because Python's `json.dumps` optimistically + converts NaN/Inf values to special types outside of the encoding step. + """ + if isinstance(obj, float): + if np.isnan(obj): + return "NaN" + elif np.isinf(obj): + return "Infinity" if obj > 0 else "-Infinity" + elif isinstance(obj, dict): + # Recursively replace in dictionaries + return {k: _replace_special_floats(v) for k, v in obj.items()} + elif isinstance(obj, list): + # Recursively replace in lists + return [_replace_special_floats(item) for item in obj] + return obj + + +class ArrayV3MetadataDict(TypedDict): + """ + A typed dictionary model for zarr v3 metadata. + """ + + zarr_format: Literal[3] + attributes: dict[str, JSON] + + +@dataclass(frozen=True, kw_only=True) +class ArrayV3Metadata(Metadata): + shape: ChunkCoords + data_type: DataType + chunk_grid: ChunkGrid + chunk_key_encoding: ChunkKeyEncoding + fill_value: Any + codecs: tuple[Codec, ...] + attributes: dict[str, Any] = field(default_factory=dict) + dimension_names: tuple[str, ...] | None = None + zarr_format: Literal[3] = field(default=3, init=False) + node_type: Literal["array"] = field(default="array", init=False) + storage_transformers: tuple[dict[str, JSON], ...] + + def __init__( + self, + *, + shape: Iterable[int], + data_type: npt.DTypeLike | DataType, + chunk_grid: dict[str, JSON] | ChunkGrid, + chunk_key_encoding: dict[str, JSON] | ChunkKeyEncoding, + fill_value: Any, + codecs: Iterable[Codec | dict[str, JSON]], + attributes: None | dict[str, JSON], + dimension_names: None | Iterable[str], + storage_transformers: None | Iterable[dict[str, JSON]] = None, + ) -> None: + """ + Because the class is a frozen dataclass, we set attributes using object.__setattr__ + """ + shape_parsed = parse_shapelike(shape) + data_type_parsed = DataType.parse(data_type) + chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid) + chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding) + dimension_names_parsed = parse_dimension_names(dimension_names) + if fill_value is None: + fill_value = default_fill_value(data_type_parsed) + # we pass a string here rather than an enum to make mypy happy + fill_value_parsed = parse_fill_value( + fill_value, dtype=cast(ALL_DTYPES, data_type_parsed.value) + ) + attributes_parsed = parse_attributes(attributes) + codecs_parsed_partial = parse_codecs(codecs) + storage_transformers_parsed = parse_storage_transformers(storage_transformers) + + array_spec = ArraySpec( + shape=shape_parsed, + dtype=data_type_parsed.to_numpy(), + fill_value=fill_value_parsed, + order="C", # TODO: order is not needed here. + prototype=default_buffer_prototype(), # TODO: prototype is not needed here. + ) + codecs_parsed = [c.evolve_from_array_spec(array_spec) for c in codecs_parsed_partial] + validate_codecs(codecs_parsed_partial, data_type_parsed) + + object.__setattr__(self, "shape", shape_parsed) + object.__setattr__(self, "data_type", data_type_parsed) + object.__setattr__(self, "chunk_grid", chunk_grid_parsed) + object.__setattr__(self, "chunk_key_encoding", chunk_key_encoding_parsed) + object.__setattr__(self, "codecs", codecs_parsed) + object.__setattr__(self, "dimension_names", dimension_names_parsed) + object.__setattr__(self, "fill_value", fill_value_parsed) + object.__setattr__(self, "attributes", attributes_parsed) + object.__setattr__(self, "storage_transformers", storage_transformers_parsed) + + self._validate_metadata() + + def _validate_metadata(self) -> None: + if isinstance(self.chunk_grid, RegularChunkGrid) and len(self.shape) != len( + self.chunk_grid.chunk_shape + ): + raise ValueError( + "`chunk_shape` and `shape` need to have the same number of dimensions." + ) + if self.dimension_names is not None and len(self.shape) != len(self.dimension_names): + raise ValueError( + "`dimension_names` and `shape` need to have the same number of dimensions." + ) + if self.fill_value is None: + raise ValueError("`fill_value` is required.") + for codec in self.codecs: + codec.validate( + shape=self.shape, dtype=self.data_type.to_numpy(), chunk_grid=self.chunk_grid + ) + + @property + def dtype(self) -> np.dtype[Any]: + """Interpret Zarr dtype as NumPy dtype""" + return self.data_type.to_numpy() + + @property + def ndim(self) -> int: + return len(self.shape) + + def get_chunk_spec( + self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype + ) -> ArraySpec: + assert isinstance( + self.chunk_grid, RegularChunkGrid + ), "Currently, only regular chunk grid is supported" + return ArraySpec( + shape=self.chunk_grid.chunk_shape, + dtype=self.dtype, + fill_value=self.fill_value, + order=order, + prototype=prototype, + ) + + def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: + return self.chunk_key_encoding.encode_chunk_key(chunk_coords) + + def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: + d = _replace_special_floats(self.to_dict()) + return {ZARR_JSON: prototype.buffer.from_bytes(json.dumps(d, cls=V3JsonEncoder).encode())} + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + # make a copy because we are modifying the dict + _data = data.copy() + + # check that the zarr_format attribute is correct + _ = parse_zarr_format(_data.pop("zarr_format")) + # check that the node_type attribute is correct + _ = parse_node_type_array(_data.pop("node_type")) + + # check that the data_type attribute is valid + data_type = DataType.parse(_data.pop("data_type")) + + # dimension_names key is optional, normalize missing to `None` + _data["dimension_names"] = _data.pop("dimension_names", None) + # attributes key is optional, normalize missing to `None` + _data["attributes"] = _data.pop("attributes", None) + return cls(**_data, data_type=data_type) # type: ignore[arg-type] + + def to_dict(self) -> dict[str, JSON]: + out_dict = super().to_dict() + + if not isinstance(out_dict, dict): + raise TypeError(f"Expected dict. Got {type(out_dict)}.") + + # if `dimension_names` is `None`, we do not include it in + # the metadata document + if out_dict["dimension_names"] is None: + out_dict.pop("dimension_names") + return out_dict + + def update_shape(self, shape: ChunkCoords) -> Self: + return replace(self, shape=shape) + + def update_attributes(self, attributes: dict[str, JSON]) -> Self: + return replace(self, attributes=attributes) + + +# enum Literals can't be used in typing, so we have to restate all of the V3 dtypes as types +# https://github.com/python/typing/issues/781 + +BOOL_DTYPE = Literal["bool"] +BOOL = np.bool_ +INTEGER_DTYPE = Literal["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"] +INTEGER = np.int8 | np.int16 | np.int32 | np.int64 | np.uint8 | np.uint16 | np.uint32 | np.uint64 +FLOAT_DTYPE = Literal["float16", "float32", "float64"] +FLOAT = np.float16 | np.float32 | np.float64 +COMPLEX_DTYPE = Literal["complex64", "complex128"] +COMPLEX = np.complex64 | np.complex128 +STRING_DTYPE = Literal["string"] +STRING = np.str_ +BYTES_DTYPE = Literal["bytes"] +BYTES = np.bytes_ + +ALL_DTYPES = BOOL_DTYPE | INTEGER_DTYPE | FLOAT_DTYPE | COMPLEX_DTYPE | STRING_DTYPE | BYTES_DTYPE + + +@overload +def parse_fill_value( + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool, + dtype: BOOL_DTYPE, +) -> BOOL: ... + + +@overload +def parse_fill_value( + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool, + dtype: INTEGER_DTYPE, +) -> INTEGER: ... + + +@overload +def parse_fill_value( + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool, + dtype: FLOAT_DTYPE, +) -> FLOAT: ... + + +@overload +def parse_fill_value( + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool, + dtype: COMPLEX_DTYPE, +) -> COMPLEX: ... + + +@overload +def parse_fill_value( + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool, + dtype: STRING_DTYPE, +) -> STRING: ... + + +@overload +def parse_fill_value( + fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool, + dtype: BYTES_DTYPE, +) -> BYTES: ... + + +def parse_fill_value( + fill_value: Any, + dtype: ALL_DTYPES, +) -> Any: + """ + Parse `fill_value`, a potential fill value, into an instance of `dtype`, a data type. + If `fill_value` is `None`, then this function will return the result of casting the value 0 + to the provided data type. Otherwise, `fill_value` will be cast to the provided data type. + + Note that some numpy dtypes use very permissive casting rules. For example, + `np.bool_({'not remotely a bool'})` returns `True`. Thus this function should not be used for + validating that the provided fill value is a valid instance of the data type. + + Parameters + ---------- + fill_value: Any + A potential fill value. + dtype: str + A valid Zarr V3 DataType. + + Returns + ------- + A scalar instance of `dtype` + """ + data_type = DataType(dtype) + if fill_value is None: + raise ValueError("Fill value cannot be None") + if data_type == DataType.string: + return np.str_(fill_value) + if data_type == DataType.bytes: + return np.bytes_(fill_value) + + # the rest are numeric types + np_dtype = cast(np.dtype[np.generic], data_type.to_numpy()) + + if isinstance(fill_value, Sequence) and not isinstance(fill_value, str): + if data_type in (DataType.complex64, DataType.complex128): + if len(fill_value) == 2: + # complex datatypes serialize to JSON arrays with two elements + return np_dtype.type(complex(*fill_value)) + else: + msg = ( + f"Got an invalid fill value for complex data type {data_type.value}." + f"Expected a sequence with 2 elements, but {fill_value!r} has " + f"length {len(fill_value)}." + ) + raise ValueError(msg) + msg = f"Cannot parse non-string sequence {fill_value!r} as a scalar with type {data_type.value}." + raise TypeError(msg) + + # Cast the fill_value to the given dtype + try: + # This warning filter can be removed after Zarr supports numpy>=2.0 + # The warning is saying that the future behavior of out of bounds casting will be to raise + # an OverflowError. In the meantime, we allow overflow and catch cases where + # fill_value != casted_value below. + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + casted_value = np.dtype(np_dtype).type(fill_value) + except (ValueError, OverflowError, TypeError) as e: + raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}") from e + # Check if the value is still representable by the dtype + if fill_value == "NaN" and np.isnan(casted_value): + pass + elif fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value): + pass + elif np_dtype.kind in "cf": + # float comparison is not exact, especially when dtype str | bytes | np.generic: + if dtype == DataType.string: + return "" + elif dtype == DataType.bytes: + return b"" + else: + np_dtype = dtype.to_numpy() + np_dtype = cast(np.dtype[np.generic], np_dtype) + return np_dtype.type(0) + + +# For type checking +_bool = bool + + +class DataType(Enum): + bool = "bool" + int8 = "int8" + int16 = "int16" + int32 = "int32" + int64 = "int64" + uint8 = "uint8" + uint16 = "uint16" + uint32 = "uint32" + uint64 = "uint64" + float16 = "float16" + float32 = "float32" + float64 = "float64" + complex64 = "complex64" + complex128 = "complex128" + string = "string" + bytes = "bytes" + + @property + def byte_count(self) -> None | int: + data_type_byte_counts = { + DataType.bool: 1, + DataType.int8: 1, + DataType.int16: 2, + DataType.int32: 4, + DataType.int64: 8, + DataType.uint8: 1, + DataType.uint16: 2, + DataType.uint32: 4, + DataType.uint64: 8, + DataType.float16: 2, + DataType.float32: 4, + DataType.float64: 8, + DataType.complex64: 8, + DataType.complex128: 16, + } + try: + return data_type_byte_counts[self] + except KeyError: + # string and bytes have variable length + return None + + @property + def has_endianness(self) -> _bool: + return self.byte_count is not None and self.byte_count != 1 + + def to_numpy_shortname(self) -> str: + data_type_to_numpy = { + DataType.bool: "bool", + DataType.int8: "i1", + DataType.int16: "i2", + DataType.int32: "i4", + DataType.int64: "i8", + DataType.uint8: "u1", + DataType.uint16: "u2", + DataType.uint32: "u4", + DataType.uint64: "u8", + DataType.float16: "f2", + DataType.float32: "f4", + DataType.float64: "f8", + DataType.complex64: "c8", + DataType.complex128: "c16", + } + return data_type_to_numpy[self] + + def to_numpy(self) -> np.dtypes.StringDType | np.dtypes.ObjectDType | np.dtype[np.generic]: + # note: it is not possible to round trip DataType <-> np.dtype + # due to the fact that DataType.string and DataType.bytes both + # generally return np.dtype("O") from this function, even though + # they can originate as fixed-length types (e.g. " DataType: + if dtype.kind in "UT": + return DataType.string + elif dtype.kind == "S": + return DataType.bytes + dtype_to_data_type = { + "|b1": "bool", + "bool": "bool", + "|i1": "int8", + " DataType: + if dtype is None: + return DataType[DEFAULT_DTYPE] + if isinstance(dtype, DataType): + return dtype + try: + return DataType(dtype) + except ValueError: + pass + try: + dtype = np.dtype(dtype) + except (ValueError, TypeError) as e: + raise ValueError(f"Invalid V3 data_type: {dtype}") from e + # check that this is a valid v3 data_type + try: + data_type = DataType.from_numpy(dtype) + except KeyError as e: + raise ValueError(f"Invalid V3 data_type: {dtype}") from e + return data_type diff --git a/src/zarr/core/strings.py b/src/zarr/core/strings.py new file mode 100644 index 0000000000..9ec391c04a --- /dev/null +++ b/src/zarr/core/strings.py @@ -0,0 +1,87 @@ +"""This module contains utilities for working with string arrays across +different versions of Numpy. +""" + +from typing import Any, Union, cast +from warnings import warn + +import numpy as np + +# _STRING_DTYPE is the in-memory datatype that will be used for V3 string arrays +# when reading data back from Zarr. +# Any valid string-like datatype should be fine for *setting* data. + +_STRING_DTYPE: Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"] +_NUMPY_SUPPORTS_VLEN_STRING: bool + + +def cast_array( + data: np.ndarray[Any, np.dtype[Any]], +) -> np.ndarray[Any, Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"]]: + raise NotImplementedError + + +try: + # this new vlen string dtype was added in NumPy 2.0 + _STRING_DTYPE = np.dtypes.StringDType() + _NUMPY_SUPPORTS_VLEN_STRING = True + + def cast_array( + data: np.ndarray[Any, np.dtype[Any]], + ) -> np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType]: + out = data.astype(_STRING_DTYPE, copy=False) + return cast(np.ndarray[Any, np.dtypes.StringDType], out) + +except AttributeError: + # if not available, we fall back on an object array of strings, as in Zarr < 3 + _STRING_DTYPE = np.dtypes.ObjectDType() + _NUMPY_SUPPORTS_VLEN_STRING = False + + def cast_array( + data: np.ndarray[Any, np.dtype[Any]], + ) -> np.ndarray[Any, Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"]]: + out = data.astype(_STRING_DTYPE, copy=False) + return cast(np.ndarray[Any, np.dtypes.ObjectDType], out) + + +def cast_to_string_dtype( + data: np.ndarray[Any, np.dtype[Any]], safe: bool = False +) -> np.ndarray[Any, Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"]]: + """Take any data and attempt to cast to to our preferred string dtype. + + data : np.ndarray + The data to cast + + safe : bool + If True, do not issue a warning if the data is cast from object to string dtype. + + """ + if np.issubdtype(data.dtype, np.str_): + # legacy fixed-width string type (e.g. "= 2.", + stacklevel=2, + ) + return cast_array(data) + raise ValueError(f"Cannot cast dtype {data.dtype} to string dtype") diff --git a/src/zarr/core/sync.py b/src/zarr/core/sync.py new file mode 100644 index 0000000000..20f04f543b --- /dev/null +++ b/src/zarr/core/sync.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +import asyncio +import atexit +import logging +import threading +from concurrent.futures import ThreadPoolExecutor, wait +from typing import TYPE_CHECKING, TypeVar + +from typing_extensions import ParamSpec + +from zarr.core.config import config + +if TYPE_CHECKING: + from collections.abc import AsyncIterator, Coroutine + from typing import Any + +logger = logging.getLogger(__name__) + + +P = ParamSpec("P") +T = TypeVar("T") + +# From https://github.com/fsspec/filesystem_spec/blob/master/fsspec/asyn.py + +iothread: list[threading.Thread | None] = [None] # dedicated IO thread +loop: list[asyncio.AbstractEventLoop | None] = [ + None +] # global event loop for any non-async instance +_lock: threading.Lock | None = None # global lock placeholder +_executor: ThreadPoolExecutor | None = None # global executor placeholder + + +class SyncError(Exception): + pass + + +def _get_lock() -> threading.Lock: + """Allocate or return a threading lock. + + The lock is allocated on first use to allow setting one lock per forked process. + """ + global _lock + if not _lock: + _lock = threading.Lock() + return _lock + + +def _get_executor() -> ThreadPoolExecutor: + """Return Zarr Thread Pool Executor + + The executor is allocated on first use. + """ + global _executor + if not _executor: + max_workers = config.get("threading.max_workers", None) + print(max_workers) + # if max_workers is not None and max_workers > 0: + # raise ValueError(max_workers) + _executor = ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="zarr_pool") + _get_loop().set_default_executor(_executor) + return _executor + + +def cleanup_resources() -> None: + global _executor + if _executor: + _executor.shutdown(wait=True, cancel_futures=True) + _executor = None + + if loop[0] is not None: + with _get_lock(): + # Stop the event loop safely + loop[0].call_soon_threadsafe(loop[0].stop) # Stop loop from another thread + if iothread[0] is not None: + iothread[0].join(timeout=0.2) # Add a timeout to avoid hanging + + if iothread[0].is_alive(): + logger.warning( + "Thread did not finish cleanly; forcefully closing the event loop." + ) + + # Forcefully close the event loop to release resources + loop[0].close() + + # dereference the loop and iothread + loop[0] = None + iothread[0] = None + + +atexit.register(cleanup_resources) + + +async def _runner(coro: Coroutine[Any, Any, T]) -> T | BaseException: + """ + Await a coroutine and return the result of running it. If awaiting the coroutine raises an + exception, the exception will be returned. + """ + try: + return await coro + except Exception as ex: + return ex + + +def sync( + coro: Coroutine[Any, Any, T], + loop: asyncio.AbstractEventLoop | None = None, + timeout: float | None = None, +) -> T: + """ + Make loop run coroutine until it returns. Runs in other thread + + Examples + -------- + >>> sync(async_function(), existing_loop) + """ + if loop is None: + # NB: if the loop is not running *yet*, it is OK to submit work + # and we will wait for it + loop = _get_loop() + if not isinstance(loop, asyncio.AbstractEventLoop): + raise TypeError(f"loop cannot be of type {type(loop)}") + if loop.is_closed(): + raise RuntimeError("Loop is not running") + try: + loop0 = asyncio.events.get_running_loop() + if loop0 is loop: + raise SyncError("Calling sync() from within a running loop") + except RuntimeError: + pass + + future = asyncio.run_coroutine_threadsafe(_runner(coro), loop) + + finished, unfinished = wait([future], return_when=asyncio.ALL_COMPLETED, timeout=timeout) + if len(unfinished) > 0: + raise TimeoutError(f"Coroutine {coro} failed to finish in within {timeout}s") + assert len(finished) == 1 + return_result = next(iter(finished)).result() + + if isinstance(return_result, BaseException): + raise return_result + else: + return return_result + + +def _get_loop() -> asyncio.AbstractEventLoop: + """Create or return the default fsspec IO loop + + The loop will be running on a separate thread. + """ + if loop[0] is None: + with _get_lock(): + # repeat the check just in case the loop got filled between the + # previous two calls from another thread + if loop[0] is None: + new_loop = asyncio.new_event_loop() + loop[0] = new_loop + iothread[0] = threading.Thread(target=new_loop.run_forever, name="zarr_io") + assert iothread[0] is not None + iothread[0].daemon = True + iothread[0].start() + assert loop[0] is not None + return loop[0] + + +async def _collect_aiterator(data: AsyncIterator[T]) -> tuple[T, ...]: + """ + Collect an entire async iterator into a tuple + """ + result = [x async for x in data] + return tuple(result) + + +def collect_aiterator(data: AsyncIterator[T]) -> tuple[T, ...]: + """ + Synchronously collect an entire async iterator into a tuple. + """ + return sync(_collect_aiterator(data)) + + +class SyncMixin: + def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: + # TODO: refactor this to to take *args and **kwargs and pass those to the method + # this should allow us to better type the sync wrapper + return sync( + coroutine, + timeout=config.get("async.timeout"), + ) + + def _sync_iter(self, async_iterator: AsyncIterator[T]) -> list[T]: + async def iter_to_list() -> list[T]: + return [item async for item in async_iterator] + + return self._sync(iter_to_list()) diff --git a/src/zarr/creation.py b/src/zarr/creation.py new file mode 100644 index 0000000000..63f93ba6f6 --- /dev/null +++ b/src/zarr/creation.py @@ -0,0 +1,37 @@ +import warnings + +from zarr.api.synchronous import ( + array, + create, + empty, + empty_like, + full, + full_like, + ones, + ones_like, + open_array, + open_like, + zeros, + zeros_like, +) + +__all__ = [ + "array", + "create", + "empty", + "empty_like", + "full", + "full_like", + "ones", + "ones_like", + "open_array", + "open_like", + "zeros", + "zeros_like", +] + +warnings.warn( + "zarr.creation is deprecated, use zarr.api.synchronous", + DeprecationWarning, + stacklevel=2, +) diff --git a/src/zarr/errors.py b/src/zarr/errors.py new file mode 100644 index 0000000000..e6d416bcc6 --- /dev/null +++ b/src/zarr/errors.py @@ -0,0 +1,47 @@ +from typing import Any + + +class _BaseZarrError(ValueError): + _msg = "" + + def __init__(self, *args: Any) -> None: + super().__init__(self._msg.format(*args)) + + +class ContainsGroupError(_BaseZarrError): + _msg = "A group exists in store {0!r} at path {1!r}." + + +class ContainsArrayError(_BaseZarrError): + _msg = "An array exists in store {0!r} at path {1!r}." + + +class ContainsArrayAndGroupError(_BaseZarrError): + _msg = ( + "Array and group metadata documents (.zarray and .zgroup) were both found in store " + "{0!r} at path {1!r}." + "Only one of these files may be present in a given directory / prefix. " + "Remove the .zarray file, or the .zgroup file, or both." + ) + + +class MetadataValidationError(_BaseZarrError): + """An exception raised when the Zarr metadata is invalid in some way""" + + _msg = "Invalid value for '{}'. Expected '{}'. Got '{}'." + + +class NodeTypeValidationError(MetadataValidationError): + """ + Specialized exception when the node_type of the metadata document is incorrect.. + + This can be raised when the value is invalid or unexpected given the context, + for example an 'array' node when we expected a 'group'. + """ + + +__all__ = [ + "ContainsArrayAndGroupError", + "ContainsArrayError", + "ContainsGroupError", +] diff --git a/src/zarr/py.typed b/src/zarr/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/zarr/registry.py b/src/zarr/registry.py new file mode 100644 index 0000000000..fcea834f04 --- /dev/null +++ b/src/zarr/registry.py @@ -0,0 +1,192 @@ +from __future__ import annotations + +import warnings +from collections import defaultdict +from importlib.metadata import entry_points as get_entry_points +from typing import TYPE_CHECKING, Any, Generic, TypeVar + +from zarr.core.config import BadConfigError, config + +if TYPE_CHECKING: + from importlib.metadata import EntryPoint + + from zarr.abc.codec import Codec, CodecPipeline + from zarr.core.buffer import Buffer, NDBuffer + +__all__ = [ + "Registry", + "get_buffer_class", + "get_codec_class", + "get_ndbuffer_class", + "get_pipeline_class", + "register_buffer", + "register_codec", + "register_ndbuffer", + "register_pipeline", +] + +T = TypeVar("T") + + +class Registry(dict[str, type[T]], Generic[T]): + def __init__(self) -> None: + super().__init__() + self.lazy_load_list: list[EntryPoint] = [] + + def lazy_load(self) -> None: + for e in self.lazy_load_list: + self.register(e.load()) + self.lazy_load_list.clear() + + def register(self, cls: type[T]) -> None: + self[fully_qualified_name(cls)] = cls + + +__codec_registries: dict[str, Registry[Codec]] = defaultdict(Registry) +__pipeline_registry: Registry[CodecPipeline] = Registry() +__buffer_registry: Registry[Buffer] = Registry() +__ndbuffer_registry: Registry[NDBuffer] = Registry() + +""" +The registry module is responsible for managing implementations of codecs, pipelines, buffers and ndbuffers and +collecting them from entrypoints. +The implementation used is determined by the config +""" + + +def _collect_entrypoints() -> list[Registry[Any]]: + """ + Collects codecs, pipelines, buffers and ndbuffers from entrypoints. + Entry points can either be single items or groups of items. + Allowed syntax for entry_points.txt is e.g. + + [zarr.codecs] + gzip = package:EntrypointGzipCodec1 + [zarr.codecs.gzip] + some_name = package:EntrypointGzipCodec2 + another = package:EntrypointGzipCodec3 + + [zarr] + buffer = package:TestBuffer1 + [zarr.buffer] + xyz = package:TestBuffer2 + abc = package:TestBuffer3 + ... + """ + entry_points = get_entry_points() + + __buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.buffer")) + __buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="buffer")) + __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.ndbuffer")) + __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="ndbuffer")) + __pipeline_registry.lazy_load_list.extend(entry_points.select(group="zarr.codec_pipeline")) + __pipeline_registry.lazy_load_list.extend( + entry_points.select(group="zarr", name="codec_pipeline") + ) + for e in entry_points.select(group="zarr.codecs"): + __codec_registries[e.name].lazy_load_list.append(e) + for group in entry_points.groups: + if group.startswith("zarr.codecs."): + codec_name = group.split(".")[2] + __codec_registries[codec_name].lazy_load_list.extend(entry_points.select(group=group)) + return [ + *__codec_registries.values(), + __pipeline_registry, + __buffer_registry, + __ndbuffer_registry, + ] + + +def _reload_config() -> None: + config.refresh() + + +def fully_qualified_name(cls: type) -> str: + module = cls.__module__ + return module + "." + cls.__qualname__ + + +def register_codec(key: str, codec_cls: type[Codec]) -> None: + if key not in __codec_registries: + __codec_registries[key] = Registry() + __codec_registries[key].register(codec_cls) + + +def register_pipeline(pipe_cls: type[CodecPipeline]) -> None: + __pipeline_registry.register(pipe_cls) + + +def register_ndbuffer(cls: type[NDBuffer]) -> None: + __ndbuffer_registry.register(cls) + + +def register_buffer(cls: type[Buffer]) -> None: + __buffer_registry.register(cls) + + +def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]: + if reload_config: + _reload_config() + + if key in __codec_registries: + # logger.debug("Auto loading codec '%s' from entrypoint", codec_id) + __codec_registries[key].lazy_load() + + codec_classes = __codec_registries[key] + if not codec_classes: + raise KeyError(key) + + config_entry = config.get("codecs", {}).get(key) + if config_entry is None: + warnings.warn( + f"Codec '{key}' not configured in config. Selecting any implementation.", stacklevel=2 + ) + return list(codec_classes.values())[-1] + selected_codec_cls = codec_classes[config_entry] + + if selected_codec_cls: + return selected_codec_cls + raise KeyError(key) + + +def get_pipeline_class(reload_config: bool = False) -> type[CodecPipeline]: + if reload_config: + _reload_config() + __pipeline_registry.lazy_load() + path = config.get("codec_pipeline.path") + pipeline_class = __pipeline_registry.get(path) + if pipeline_class: + return pipeline_class + raise BadConfigError( + f"Pipeline class '{path}' not found in registered pipelines: {list(__pipeline_registry)}." + ) + + +def get_buffer_class(reload_config: bool = False) -> type[Buffer]: + if reload_config: + _reload_config() + __buffer_registry.lazy_load() + + path = config.get("buffer") + buffer_class = __buffer_registry.get(path) + if buffer_class: + return buffer_class + raise BadConfigError( + f"Buffer class '{path}' not found in registered buffers: {list(__buffer_registry)}." + ) + + +def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]: + if reload_config: + _reload_config() + __ndbuffer_registry.lazy_load() + path = config.get("ndbuffer") + ndbuffer_class = __ndbuffer_registry.get(path) + if ndbuffer_class: + return ndbuffer_class + raise BadConfigError( + f"NDBuffer class '{path}' not found in registered buffers: {list(__ndbuffer_registry)}." + ) + + +_collect_entrypoints() diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py new file mode 100644 index 0000000000..6703aa2723 --- /dev/null +++ b/src/zarr/storage/__init__.py @@ -0,0 +1,17 @@ +from zarr.storage.common import StoreLike, StorePath, make_store_path +from zarr.storage.local import LocalStore +from zarr.storage.logging import LoggingStore +from zarr.storage.memory import MemoryStore +from zarr.storage.remote import RemoteStore +from zarr.storage.zip import ZipStore + +__all__ = [ + "LocalStore", + "LoggingStore", + "MemoryStore", + "RemoteStore", + "StoreLike", + "StorePath", + "ZipStore", + "make_store_path", +] diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py new file mode 100644 index 0000000000..cbc9c42bbd --- /dev/null +++ b/src/zarr/storage/_utils.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from zarr.core.buffer import Buffer + + +def _normalize_interval_index( + data: Buffer, interval: None | tuple[int | None, int | None] +) -> tuple[int, int]: + """ + Convert an implicit interval into an explicit start and length + """ + if interval is None: + start = 0 + length = len(data) + else: + maybe_start, maybe_len = interval + if maybe_start is None: + start = 0 + else: + start = maybe_start + + if maybe_len is None: + length = len(data) - start + else: + length = maybe_len + + return (start, length) diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py new file mode 100644 index 0000000000..101e8f38af --- /dev/null +++ b/src/zarr/storage/common.py @@ -0,0 +1,434 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import TYPE_CHECKING, Any, Literal + +from zarr.abc.store import ByteRangeRequest, Store +from zarr.core.buffer import Buffer, default_buffer_prototype +from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat +from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore + +# from zarr.store.remote import RemoteStore + +if TYPE_CHECKING: + from zarr.core.buffer import BufferPrototype + from zarr.core.common import AccessModeLiteral + + +def _dereference_path(root: str, path: str) -> str: + assert isinstance(root, str) + assert isinstance(path, str) + root = root.rstrip("/") + path = f"{root}/{path}" if root else path + return path.rstrip("/") + + +class StorePath: + """ + Path-like interface for a Store. + + Parameters + ---------- + store : Store + The store to use. + path : str + The path within the store. + """ + + store: Store + path: str + + def __init__(self, store: Store, path: str | None = None) -> None: + self.store = store + self.path = path or "" + + async def get( + self, + prototype: BufferPrototype | None = None, + byte_range: ByteRangeRequest | None = None, + ) -> Buffer | None: + """ + Read bytes from the store. + + Parameters + ---------- + prototype : BufferPrototype, optional + The buffer prototype to use when reading the bytes. + byte_range : ByteRangeRequest, optional + The range of bytes to read. + + Returns + ------- + buffer : Buffer or None + The read bytes, or None if the key does not exist. + """ + if prototype is None: + prototype = default_buffer_prototype() + return await self.store.get(self.path, prototype=prototype, byte_range=byte_range) + + async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None: + """ + Write bytes to the store. + + Parameters + ---------- + value : Buffer + The buffer to write. + byte_range : ByteRangeRequest, optional + The range of bytes to write. If None, the entire buffer is written. + + Raises + ------ + NotImplementedError + If `byte_range` is not None, because Store.set does not support partial writes yet. + """ + if byte_range is not None: + raise NotImplementedError("Store.set does not have partial writes yet") + await self.store.set(self.path, value) + + async def delete(self) -> None: + """ + Delete the key from the store. + + Raises + ------ + NotImplementedError + If the store does not support deletion. + """ + await self.store.delete(self.path) + + async def set_if_not_exists(self, default: Buffer) -> None: + """ + Store a key to ``value`` if the key is not already present. + + Parameters + ---------- + default : Buffer + The buffer to store if the key is not already present. + """ + await self.store.set_if_not_exists(self.path, default) + + async def exists(self) -> bool: + """ + Check if the key exists in the store. + + Returns + ------- + bool + True if the key exists in the store, False otherwise. + """ + return await self.store.exists(self.path) + + def __truediv__(self, other: str) -> StorePath: + """combine this store path with another path""" + return self.__class__(self.store, _dereference_path(self.path, other)) + + def __str__(self) -> str: + return _dereference_path(str(self.store), self.path) + + def __repr__(self) -> str: + return f"StorePath({self.store.__class__.__name__}, {str(self)!r})" + + def __eq__(self, other: object) -> bool: + """ + Check if two StorePath objects are equal. + + Returns + ------- + bool + True if the two objects are equal, False otherwise. + + Notes + ----- + Two StorePath objects are considered equal if their stores are equal + and their paths are equal. + """ + try: + return self.store == other.store and self.path == other.path # type: ignore[attr-defined, no-any-return] + except Exception: + pass + return False + + +StoreLike = Store | StorePath | Path | str | dict[str, Buffer] + + +async def make_store_path( + store_like: StoreLike | None, + *, + mode: AccessModeLiteral | None = None, + storage_options: dict[str, Any] | None = None, +) -> StorePath: + """ + Convert a `StoreLike` object into a StorePath object. + + This function takes a `StoreLike` object and returns a `StorePath` object. The + `StoreLike` object can be a `Store`, `StorePath`, `Path`, `str`, or `dict[str, Buffer]`. + If the `StoreLike` object is a Store or `StorePath`, it is converted to a + `StorePath` object. If the `StoreLike` object is a Path or str, it is converted + to a LocalStore object and then to a `StorePath` object. If the `StoreLike` + object is a dict[str, Buffer], it is converted to a `MemoryStore` object and + then to a `StorePath` object. + + If the `StoreLike` object is None, a `MemoryStore` object is created and + converted to a `StorePath` object. + + If the `StoreLike` object is a str and starts with a protocol, it is + converted to a RemoteStore object and then to a `StorePath` object. + + If the `StoreLike` object is a dict[str, Buffer] and the mode is not None, + the `MemoryStore` object is created with the given mode. + + If the `StoreLike` object is a str and starts with a protocol, the + RemoteStore object is created with the given mode and storage options. + + Parameters + ---------- + store_like : StoreLike | None + The object to convert to a `StorePath` object. + mode : AccessModeLiteral | None, optional + The mode to use when creating the `StorePath` object. If None, the + default mode is 'r'. + storage_options : dict[str, Any] | None, optional + The storage options to use when creating the `RemoteStore` object. If + None, the default storage options are used. + + Returns + ------- + StorePath + The converted StorePath object. + + Raises + ------ + TypeError + If the StoreLike object is not one of the supported types. + """ + from zarr.storage.remote import RemoteStore # circular import + + used_storage_options = False + + if isinstance(store_like, StorePath): + if mode is not None and mode != store_like.store.mode.str: + _store = store_like.store.with_mode(mode) + await _store._ensure_open() + store_like = StorePath(_store) + result = store_like + elif isinstance(store_like, Store): + if mode is not None and mode != store_like.mode.str: + store_like = store_like.with_mode(mode) + await store_like._ensure_open() + result = StorePath(store_like) + elif store_like is None: + # mode = "w" is an exception to the default mode = 'r' + result = StorePath(await MemoryStore.open(mode=mode or "w")) + elif isinstance(store_like, Path): + result = StorePath(await LocalStore.open(root=store_like, mode=mode or "r")) + elif isinstance(store_like, str): + storage_options = storage_options or {} + + if _is_fsspec_uri(store_like): + used_storage_options = True + result = StorePath( + RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r") + ) + else: + result = StorePath(await LocalStore.open(root=Path(store_like), mode=mode or "r")) + elif isinstance(store_like, dict): + # We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings. + # By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate. + result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode or "r")) + else: + msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable] + raise TypeError(msg) + + if storage_options and not used_storage_options: + msg = "'storage_options' was provided but unused. 'storage_options' is only used for fsspec filesystem stores." + raise TypeError(msg) + + return result + + +def _is_fsspec_uri(uri: str) -> bool: + """ + Check if a URI looks like a non-local fsspec URI. + + Examples + -------- + >>> _is_fsspec_uri("s3://bucket") + True + >>> _is_fsspec_uri("my-directory") + False + >>> _is_fsspec_uri("local://my-directory") + False + """ + return "://" in uri or ("::" in uri and "local://" not in uri) + + +async def ensure_no_existing_node(store_path: StorePath, zarr_format: ZarrFormat) -> None: + """ + Check if a store_path is safe for array / group creation. + Returns `None` or raises an exception. + + Parameters + ---------- + store_path: StorePath + The storage location to check. + zarr_format: ZarrFormat + The Zarr format to check. + + Raises + ------ + ContainsArrayError, ContainsGroupError, ContainsArrayAndGroupError + """ + if zarr_format == 2: + extant_node = await _contains_node_v2(store_path) + elif zarr_format == 3: + extant_node = await _contains_node_v3(store_path) + + if extant_node == "array": + raise ContainsArrayError(store_path.store, store_path.path) + elif extant_node == "group": + raise ContainsGroupError(store_path.store, store_path.path) + elif extant_node == "nothing": + return + msg = f"Invalid value for extant_node: {extant_node}" # type: ignore[unreachable] + raise ValueError(msg) + + +async def _contains_node_v3(store_path: StorePath) -> Literal["array", "group", "nothing"]: + """ + Check if a store_path contains nothing, an array, or a group. This function + returns the string "array", "group", or "nothing" to denote containing an array, a group, or + nothing. + + Parameters + ---------- + store_path: StorePath + The location in storage to check. + + Returns + ------- + Literal["array", "group", "nothing"] + A string representing the zarr node found at store_path. + """ + result: Literal["array", "group", "nothing"] = "nothing" + extant_meta_bytes = await (store_path / ZARR_JSON).get() + # if no metadata document could be loaded, then we just return "nothing" + if extant_meta_bytes is not None: + try: + extant_meta_json = json.loads(extant_meta_bytes.to_bytes()) + # avoid constructing a full metadata document here in the name of speed. + if extant_meta_json["node_type"] == "array": + result = "array" + elif extant_meta_json["node_type"] == "group": + result = "group" + except (KeyError, json.JSONDecodeError): + # either of these errors is consistent with no array or group present. + pass + return result + + +async def _contains_node_v2(store_path: StorePath) -> Literal["array", "group", "nothing"]: + """ + Check if a store_path contains nothing, an array, a group, or both. If both an array and a + group are detected, a `ContainsArrayAndGroup` exception is raised. Otherwise, this function + returns the string "array", "group", or "nothing" to denote containing an array, a group, or + nothing. + + Parameters + ---------- + store_path: StorePath + The location in storage to check. + + Returns + ------- + Literal["array", "group", "nothing"] + A string representing the zarr node found at store_path. + """ + _array = await contains_array(store_path=store_path, zarr_format=2) + _group = await contains_group(store_path=store_path, zarr_format=2) + + if _array and _group: + raise ContainsArrayAndGroupError(store_path.store, store_path.path) + elif _array: + return "array" + elif _group: + return "group" + else: + return "nothing" + + +async def contains_array(store_path: StorePath, zarr_format: ZarrFormat) -> bool: + """ + Check if an array exists at a given StorePath. + + Parameters + ---------- + store_path: StorePath + The StorePath to check for an existing group. + zarr_format: + The zarr format to check for. + + Returns + ------- + bool + True if the StorePath contains a group, False otherwise. + + """ + if zarr_format == 3: + extant_meta_bytes = await (store_path / ZARR_JSON).get() + if extant_meta_bytes is None: + return False + else: + try: + extant_meta_json = json.loads(extant_meta_bytes.to_bytes()) + # we avoid constructing a full metadata document here in the name of speed. + if extant_meta_json["node_type"] == "array": + return True + except (ValueError, KeyError): + return False + elif zarr_format == 2: + return await (store_path / ZARRAY_JSON).exists() + msg = f"Invalid zarr_format provided. Got {zarr_format}, expected 2 or 3" + raise ValueError(msg) + + +async def contains_group(store_path: StorePath, zarr_format: ZarrFormat) -> bool: + """ + Check if a group exists at a given StorePath. + + Parameters + ---------- + + store_path: StorePath + The StorePath to check for an existing group. + zarr_format: + The zarr format to check for. + + Returns + ------- + + bool + True if the StorePath contains a group, False otherwise + + """ + if zarr_format == 3: + extant_meta_bytes = await (store_path / ZARR_JSON).get() + if extant_meta_bytes is None: + return False + else: + try: + extant_meta_json = json.loads(extant_meta_bytes.to_bytes()) + # we avoid constructing a full metadata document here in the name of speed. + result: bool = extant_meta_json["node_type"] == "group" + except (ValueError, KeyError): + return False + else: + return result + elif zarr_format == 2: + return await (store_path / ZGROUP_JSON).exists() + msg = f"Invalid zarr_format provided. Got {zarr_format}, expected 2 or 3" # type: ignore[unreachable] + raise ValueError(msg) diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py new file mode 100644 index 0000000000..b80b04e1d0 --- /dev/null +++ b/src/zarr/storage/local.py @@ -0,0 +1,241 @@ +from __future__ import annotations + +import asyncio +import io +import os +import shutil +from pathlib import Path +from typing import TYPE_CHECKING, Self + +from zarr.abc.store import ByteRangeRequest, Store +from zarr.core.buffer import Buffer +from zarr.core.common import concurrent_map + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Iterable + + from zarr.core.buffer import BufferPrototype + from zarr.core.common import AccessModeLiteral + + +def _get( + path: Path, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None +) -> Buffer: + if byte_range is not None: + if byte_range[0] is None: + start = 0 + else: + start = byte_range[0] + + end = (start + byte_range[1]) if byte_range[1] is not None else None + else: + return prototype.buffer.from_bytes(path.read_bytes()) + with path.open("rb") as f: + size = f.seek(0, io.SEEK_END) + if start is not None: + if start >= 0: + f.seek(start) + else: + f.seek(max(0, size + start)) + if end is not None: + if end < 0: + end = size + end + return prototype.buffer.from_bytes(f.read(end - f.tell())) + return prototype.buffer.from_bytes(f.read()) + + +def _put( + path: Path, + value: Buffer, + start: int | None = None, + exclusive: bool = False, +) -> int | None: + path.parent.mkdir(parents=True, exist_ok=True) + if start is not None: + with path.open("r+b") as f: + f.seek(start) + f.write(value.as_numpy_array().tobytes()) + return None + else: + view = memoryview(value.as_numpy_array().tobytes()) + if exclusive: + mode = "xb" + else: + mode = "wb" + with path.open(mode=mode) as f: + return f.write(view) + + +class LocalStore(Store): + """ + Local file system store. + + Parameters + ---------- + root : str or Path + Directory to use as root of store. + mode : str + Mode in which to open the store. Either 'r', 'r+', 'a', 'w', 'w-'. + + Attributes + ---------- + supports_writes + supports_deletes + supports_partial_writes + supports_listing + root + """ + + supports_writes: bool = True + supports_deletes: bool = True + supports_partial_writes: bool = True + supports_listing: bool = True + + root: Path + + def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r") -> None: + super().__init__(mode=mode) + if isinstance(root, str): + root = Path(root) + assert isinstance(root, Path) + self.root = root + + async def _open(self) -> None: + if not self.mode.readonly: + self.root.mkdir(parents=True, exist_ok=True) + return await super()._open() + + async def clear(self) -> None: + # docstring inherited + self._check_writable() + shutil.rmtree(self.root) + self.root.mkdir() + + async def empty(self) -> bool: + # docstring inherited + try: + with os.scandir(self.root) as it: + for entry in it: + if entry.is_file(): + # stop once a file is found + return False + except FileNotFoundError: + return True + else: + return True + + def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited + return type(self)(root=self.root, mode=mode) + + def __str__(self) -> str: + return f"file://{self.root}" + + def __repr__(self) -> str: + return f"LocalStore({str(self)!r})" + + def __eq__(self, other: object) -> bool: + return isinstance(other, type(self)) and self.root == other.root + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int | None, int | None] | None = None, + ) -> Buffer | None: + # docstring inherited + if not self._is_open: + await self._open() + assert isinstance(key, str) + path = self.root / key + + try: + return await asyncio.to_thread(_get, path, prototype, byte_range) + except (FileNotFoundError, IsADirectoryError, NotADirectoryError): + return None + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: Iterable[tuple[str, ByteRangeRequest]], + ) -> list[Buffer | None]: + # docstring inherited + args = [] + for key, byte_range in key_ranges: + assert isinstance(key, str) + path = self.root / key + args.append((_get, path, prototype, byte_range)) + return await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit + + async def set(self, key: str, value: Buffer) -> None: + # docstring inherited + return await self._set(key, value) + + async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited + try: + return await self._set(key, value, exclusive=True) + except FileExistsError: + pass + + async def _set(self, key: str, value: Buffer, exclusive: bool = False) -> None: + if not self._is_open: + await self._open() + self._check_writable() + assert isinstance(key, str) + if not isinstance(value, Buffer): + raise TypeError("LocalStore.set(): `value` must a Buffer instance") + path = self.root / key + await asyncio.to_thread(_put, path, value, start=None, exclusive=exclusive) + + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, bytes | bytearray | memoryview]] + ) -> None: + # docstring inherited + self._check_writable() + args = [] + for key, start, value in key_start_values: + assert isinstance(key, str) + path = self.root / key + args.append((_put, path, value, start)) + await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit + + async def delete(self, key: str) -> None: + # docstring inherited + self._check_writable() + path = self.root / key + if path.is_dir(): # TODO: support deleting directories? shutil.rmtree? + shutil.rmtree(path) + else: + await asyncio.to_thread(path.unlink, True) # Q: we may want to raise if path is missing + + async def exists(self, key: str) -> bool: + # docstring inherited + path = self.root / key + return await asyncio.to_thread(path.is_file) + + async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited + to_strip = str(self.root) + "/" + for p in list(self.root.rglob("*")): + if p.is_file(): + yield str(p).replace(to_strip, "") + + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + to_strip = os.path.join(str(self.root / prefix)) + for p in (self.root / prefix).rglob("*"): + if p.is_file(): + yield str(p.relative_to(to_strip)) + + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + base = self.root / prefix + to_strip = str(base) + "/" + + try: + key_iter = base.iterdir() + for key in key_iter: + yield str(key).replace(to_strip, "") + except (FileNotFoundError, NotADirectoryError): + pass diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py new file mode 100644 index 0000000000..59a796dc18 --- /dev/null +++ b/src/zarr/storage/logging.py @@ -0,0 +1,232 @@ +from __future__ import annotations + +import inspect +import logging +import time +from collections import defaultdict +from contextlib import contextmanager +from typing import TYPE_CHECKING, Any, Self + +from zarr.abc.store import AccessMode, ByteRangeRequest, Store + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Generator, Iterable + + from zarr.core.buffer import Buffer, BufferPrototype + from zarr.core.common import AccessModeLiteral + + +class LoggingStore(Store): + """ + Store wrapper that logs all calls to the wrapped store. + + Parameters + ---------- + store: Store + Store to wrap + log_level: str + Log level + log_handler: logging.Handler + Log handler + + Attributes + ---------- + counter: dict + Counter of number of times each method has been called + """ + + _store: Store + counter: defaultdict[str, int] + + def __init__( + self, + store: Store, + log_level: str = "DEBUG", + log_handler: logging.Handler | None = None, + ) -> None: + self._store = store + self.counter = defaultdict(int) + self.log_level = log_level + self.log_handler = log_handler + + self._configure_logger(log_level, log_handler) + + def _configure_logger( + self, log_level: str = "DEBUG", log_handler: logging.Handler | None = None + ) -> None: + self.log_level = log_level + self.logger = logging.getLogger(f"LoggingStore({self._store!s})") + self.logger.setLevel(log_level) + + if not self.logger.hasHandlers(): + if not log_handler: + log_handler = self._default_handler() + # Add handler to logger + self.logger.addHandler(log_handler) + + def _default_handler(self) -> logging.Handler: + """Define a default log handler""" + handler = logging.StreamHandler() + handler.setLevel(self.log_level) + handler.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + ) + return handler + + @contextmanager + def log(self, hint: Any = "") -> Generator[None, None, None]: + """Context manager to log method calls + + Each call to the wrapped store is logged to the configured logger and added to + the counter dict. + """ + method = inspect.stack()[2].function + op = f"{type(self._store).__name__}.{method}" + if hint: + op += f"({hint})" + self.logger.info(f"Calling {op}") + start_time = time.time() + try: + self.counter[method] += 1 + yield + finally: + end_time = time.time() + self.logger.info(f"Finished {op} [{end_time - start_time:.2f}s]") + + @property + def supports_writes(self) -> bool: + with self.log(): + return self._store.supports_writes + + @property + def supports_deletes(self) -> bool: + with self.log(): + return self._store.supports_deletes + + @property + def supports_partial_writes(self) -> bool: + with self.log(): + return self._store.supports_partial_writes + + @property + def supports_listing(self) -> bool: + with self.log(): + return self._store.supports_listing + + @property + def _mode(self) -> AccessMode: # type: ignore[override] + with self.log(): + return self._store._mode + + @property + def _is_open(self) -> bool: + with self.log(): + return self._store._is_open + + @_is_open.setter + def _is_open(self, value: bool) -> None: + with self.log(value): + self._store._is_open = value + + async def _open(self) -> None: + with self.log(): + return await self._store._open() + + async def _ensure_open(self) -> None: + with self.log(): + return await self._store._ensure_open() + + async def empty(self) -> bool: + # docstring inherited + with self.log(): + return await self._store.empty() + + async def clear(self) -> None: + # docstring inherited + with self.log(): + return await self._store.clear() + + def __str__(self) -> str: + return f"logging-{self._store!s}" + + def __repr__(self) -> str: + return f"LoggingStore({repr(self._store)!r})" + + def __eq__(self, other: object) -> bool: + with self.log(other): + return self._store == other + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int | None, int | None] | None = None, + ) -> Buffer | None: + # docstring inherited + with self.log(key): + return await self._store.get(key=key, prototype=prototype, byte_range=byte_range) + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: Iterable[tuple[str, ByteRangeRequest]], + ) -> list[Buffer | None]: + # docstring inherited + keys = ",".join([k[0] for k in key_ranges]) + with self.log(keys): + return await self._store.get_partial_values(prototype=prototype, key_ranges=key_ranges) + + async def exists(self, key: str) -> bool: + # docstring inherited + with self.log(key): + return await self._store.exists(key) + + async def set(self, key: str, value: Buffer) -> None: + # docstring inherited + with self.log(key): + return await self._store.set(key=key, value=value) + + async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited + with self.log(key): + return await self._store.set_if_not_exists(key=key, value=value) + + async def delete(self, key: str) -> None: + # docstring inherited + with self.log(key): + return await self._store.delete(key=key) + + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, bytes | bytearray | memoryview]] + ) -> None: + # docstring inherited + keys = ",".join([k[0] for k in key_start_values]) + with self.log(keys): + return await self._store.set_partial_values(key_start_values=key_start_values) + + async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited + with self.log(): + async for key in self._store.list(): + yield key + + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + with self.log(prefix): + async for key in self._store.list_prefix(prefix=prefix): + yield key + + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + with self.log(prefix): + async for key in self._store.list_dir(prefix=prefix): + yield key + + def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited + with self.log(mode): + return type(self)( + self._store.with_mode(mode), + log_level=self.log_level, + log_handler=self.log_handler, + ) diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py new file mode 100644 index 0000000000..673c2a75d5 --- /dev/null +++ b/src/zarr/storage/memory.py @@ -0,0 +1,241 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Self + +from zarr.abc.store import ByteRangeRequest, Store +from zarr.core.buffer import Buffer, gpu +from zarr.core.common import concurrent_map +from zarr.storage._utils import _normalize_interval_index + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Iterable, MutableMapping + + from zarr.core.buffer import BufferPrototype + from zarr.core.common import AccessModeLiteral + + +class MemoryStore(Store): + """ + In-memory store for testing purposes. + + Parameters + ---------- + store_dict : dict + Initial data + mode : str + Access mode + + Attributes + ---------- + supports_writes + supports_deletes + supports_partial_writes + supports_listing + """ + + supports_writes: bool = True + supports_deletes: bool = True + supports_partial_writes: bool = True + supports_listing: bool = True + + _store_dict: MutableMapping[str, Buffer] + + def __init__( + self, + store_dict: MutableMapping[str, Buffer] | None = None, + *, + mode: AccessModeLiteral = "r", + ) -> None: + super().__init__(mode=mode) + if store_dict is None: + store_dict = {} + self._store_dict = store_dict + + async def empty(self) -> bool: + # docstring inherited + return not self._store_dict + + async def clear(self) -> None: + # docstring inherited + self._store_dict.clear() + + def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited + return type(self)(store_dict=self._store_dict, mode=mode) + + def __str__(self) -> str: + return f"memory://{id(self._store_dict)}" + + def __repr__(self) -> str: + return f"MemoryStore({str(self)!r})" + + def __eq__(self, other: object) -> bool: + return ( + isinstance(other, type(self)) + and self._store_dict == other._store_dict + and self.mode == other.mode + ) + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int | None, int | None] | None = None, + ) -> Buffer | None: + # docstring inherited + if not self._is_open: + await self._open() + assert isinstance(key, str) + try: + value = self._store_dict[key] + start, length = _normalize_interval_index(value, byte_range) + return prototype.buffer.from_buffer(value[start : start + length]) + except KeyError: + return None + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: Iterable[tuple[str, ByteRangeRequest]], + ) -> list[Buffer | None]: + # docstring inherited + + # All the key-ranges arguments goes with the same prototype + async def _get(key: str, byte_range: ByteRangeRequest) -> Buffer | None: + return await self.get(key, prototype=prototype, byte_range=byte_range) + + return await concurrent_map(key_ranges, _get, limit=None) + + async def exists(self, key: str) -> bool: + # docstring inherited + return key in self._store_dict + + async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + # docstring inherited + self._check_writable() + await self._ensure_open() + assert isinstance(key, str) + if not isinstance(value, Buffer): + raise TypeError(f"Expected Buffer. Got {type(value)}.") + + if byte_range is not None: + buf = self._store_dict[key] + buf[byte_range[0] : byte_range[1]] = value + self._store_dict[key] = buf + else: + self._store_dict[key] = value + + async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited + self._check_writable() + await self._ensure_open() + self._store_dict.setdefault(key, value) + + async def delete(self, key: str) -> None: + # docstring inherited + self._check_writable() + try: + del self._store_dict[key] + except KeyError: + pass + + async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: + # docstring inherited + raise NotImplementedError + + async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited + for key in self._store_dict: + yield key + + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + for key in self._store_dict: + if key.startswith(prefix): + yield key.removeprefix(prefix) + + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + if prefix.endswith("/"): + prefix = prefix[:-1] + + if prefix == "": + keys_unique = {k.split("/")[0] for k in self._store_dict} + else: + # Our dictionary doesn't contain directory markers, but we want to include + # a pseudo directory when there's a nested item and we're listing an + # intermediate level. + keys_unique = { + key.removeprefix(prefix + "/").split("/")[0] + for key in self._store_dict + if key.startswith(prefix + "/") and key != prefix + } + + for key in keys_unique: + yield key + + +class GpuMemoryStore(MemoryStore): + """A GPU only memory store that stores every chunk in GPU memory irrespective + of the original location. + + The dictionary of buffers to initialize this memory store with *must* be + GPU Buffers. + + Writing data to this store through ``.set`` will move the buffer to the GPU + if necessary. + + Parameters + ---------- + store_dict: MutableMapping, optional + A mutable mapping with string keys and :class:`zarr.core.buffer.gpu.Buffer` + values. + """ + + _store_dict: MutableMapping[str, gpu.Buffer] # type: ignore[assignment] + + def __init__( + self, + store_dict: MutableMapping[str, gpu.Buffer] | None = None, + *, + mode: AccessModeLiteral = "r", + ) -> None: + super().__init__(store_dict=store_dict, mode=mode) # type: ignore[arg-type] + + def __str__(self) -> str: + return f"gpumemory://{id(self._store_dict)}" + + def __repr__(self) -> str: + return f"GpuMemoryStore({str(self)!r})" + + @classmethod + def from_dict(cls, store_dict: MutableMapping[str, Buffer]) -> Self: + """ + Create a GpuMemoryStore from a dictionary of buffers at any location. + + The dictionary backing the newly created ``GpuMemoryStore`` will not be + the same as ``store_dict``. + + Parameters + ---------- + store_dict: mapping + A mapping of strings keys to arbitrary Buffers. The buffer data + will be moved into a :class:`gpu.Buffer`. + + Returns + ------- + GpuMemoryStore + """ + gpu_store_dict = {k: gpu.Buffer.from_buffer(v) for k, v in store_dict.items()} + return cls(gpu_store_dict) + + async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + # docstring inherited + self._check_writable() + assert isinstance(key, str) + if not isinstance(value, Buffer): + raise TypeError(f"Expected Buffer. Got {type(value)}.") + + # Convert to gpu.Buffer + gpu_value = value if isinstance(value, gpu.Buffer) else gpu.Buffer.from_buffer(value) + await super().set(key, gpu_value, byte_range=byte_range) diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py new file mode 100644 index 0000000000..6945f129ed --- /dev/null +++ b/src/zarr/storage/remote.py @@ -0,0 +1,304 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Self + +import fsspec + +from zarr.abc.store import ByteRangeRequest, Store +from zarr.core.buffer import Buffer +from zarr.storage.common import _dereference_path + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Iterable + + from fsspec.asyn import AsyncFileSystem + + from zarr.core.buffer import Buffer, BufferPrototype + from zarr.core.common import AccessModeLiteral, BytesLike + + +ALLOWED_EXCEPTIONS: tuple[type[Exception], ...] = ( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, +) + + +class RemoteStore(Store): + """ + A remote Store based on FSSpec + + Parameters + ---------- + fs : AsyncFileSystem + The Async FSSpec filesystem to use with this store. + mode : AccessModeLiteral + The access mode to use. + path : str + The root path of the store. + allowed_exceptions : tuple[type[Exception], ...] + When fetching data, these cases will be deemed to correspond to missing keys. + + Attributes + ---------- + fs + allowed_exceptions + supports_writes + supports_deletes + supports_partial_writes + supports_listing + """ + + # based on FSSpec + supports_writes: bool = True + supports_deletes: bool = True + supports_partial_writes: bool = False + supports_listing: bool = True + + fs: AsyncFileSystem + allowed_exceptions: tuple[type[Exception], ...] + + def __init__( + self, + fs: AsyncFileSystem, + mode: AccessModeLiteral = "r", + path: str = "/", + allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, + ) -> None: + super().__init__(mode=mode) + self.fs = fs + self.path = path + self.allowed_exceptions = allowed_exceptions + + if not self.fs.async_impl: + raise TypeError("Filesystem needs to support async operations.") + + @classmethod + def from_upath( + cls, + upath: Any, + mode: AccessModeLiteral = "r", + allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, + ) -> RemoteStore: + """ + Create a RemoteStore from an upath object. + + Parameters + ---------- + upath : UPath + The upath to the root of the store. + mode : str, optional + The mode of the store. Defaults to "r". + allowed_exceptions : tuple, optional + The exceptions that are allowed to be raised when accessing the + store. Defaults to ALLOWED_EXCEPTIONS. + + Returns + ------- + RemoteStore + """ + return cls( + fs=upath.fs, + path=upath.path.rstrip("/"), + mode=mode, + allowed_exceptions=allowed_exceptions, + ) + + @classmethod + def from_url( + cls, + url: str, + storage_options: dict[str, Any] | None = None, + mode: AccessModeLiteral = "r", + allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, + ) -> RemoteStore: + """ + Create a RemoteStore from a URL. + + Parameters + ---------- + url : str + The URL to the root of the store. + storage_options : dict, optional + The options to pass to fsspec when creating the filesystem. + mode : str, optional + The mode of the store. Defaults to "r". + allowed_exceptions : tuple, optional + The exceptions that are allowed to be raised when accessing the + store. Defaults to ALLOWED_EXCEPTIONS. + + Returns + ------- + RemoteStore + """ + fs, path = fsspec.url_to_fs(url, **storage_options) + return cls(fs=fs, path=path, mode=mode, allowed_exceptions=allowed_exceptions) + + async def clear(self) -> None: + # docstring inherited + try: + for subpath in await self.fs._find(self.path, withdirs=True): + if subpath != self.path: + await self.fs._rm(subpath, recursive=True) + except FileNotFoundError: + pass + + async def empty(self) -> bool: + # docstring inherited + + # TODO: it would be nice if we didn't have to list all keys here + # it should be possible to stop after the first key is discovered + try: + return not await self.fs._ls(self.path) + except FileNotFoundError: + return True + + def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited + return type(self)( + fs=self.fs, + mode=mode, + path=self.path, + allowed_exceptions=self.allowed_exceptions, + ) + + def __repr__(self) -> str: + return f"" + + def __eq__(self, other: object) -> bool: + return ( + isinstance(other, type(self)) + and self.path == other.path + and self.mode == other.mode + and self.fs == other.fs + ) + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: ByteRangeRequest | None = None, + ) -> Buffer | None: + # docstring inherited + if not self._is_open: + await self._open() + path = _dereference_path(self.path, key) + + try: + if byte_range: + # fsspec uses start/end, not start/length + start, length = byte_range + if start is not None and length is not None: + end = start + length + elif length is not None: + end = length + else: + end = None + value = prototype.buffer.from_bytes( + await ( + self.fs._cat_file(path, start=byte_range[0], end=end) + if byte_range + else self.fs._cat_file(path) + ) + ) + + except self.allowed_exceptions: + return None + except OSError as e: + if "not satisfiable" in str(e): + # this is an s3-specific condition we probably don't want to leak + return prototype.buffer.from_bytes(b"") + raise + else: + return value + + async def set( + self, + key: str, + value: Buffer, + byte_range: tuple[int, int] | None = None, + ) -> None: + # docstring inherited + if not self._is_open: + await self._open() + self._check_writable() + path = _dereference_path(self.path, key) + # write data + if byte_range: + raise NotImplementedError + await self.fs._pipe_file(path, value.to_bytes()) + + async def delete(self, key: str) -> None: + # docstring inherited + self._check_writable() + path = _dereference_path(self.path, key) + try: + await self.fs._rm(path) + except FileNotFoundError: + pass + except self.allowed_exceptions: + pass + + async def exists(self, key: str) -> bool: + # docstring inherited + path = _dereference_path(self.path, key) + exists: bool = await self.fs._exists(path) + return exists + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: Iterable[tuple[str, ByteRangeRequest]], + ) -> list[Buffer | None]: + # docstring inherited + if key_ranges: + paths, starts, stops = zip( + *( + ( + _dereference_path(self.path, k[0]), + k[1][0], + ((k[1][0] or 0) + k[1][1]) if k[1][1] is not None else None, + ) + for k in key_ranges + ), + strict=False, + ) + else: + return [] + # TODO: expectations for exceptions or missing keys? + res = await self.fs._cat_ranges(list(paths), starts, stops, on_error="return") + # the following is an s3-specific condition we probably don't want to leak + res = [b"" if (isinstance(r, OSError) and "not satisfiable" in str(r)) else r for r in res] + for r in res: + if isinstance(r, Exception) and not isinstance(r, self.allowed_exceptions): + raise r + + return [None if isinstance(r, Exception) else prototype.buffer.from_bytes(r) for r in res] + + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, BytesLike]] + ) -> None: + # docstring inherited + raise NotImplementedError + + async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited + allfiles = await self.fs._find(self.path, detail=False, withdirs=False) + for onefile in (a.replace(self.path + "/", "") for a in allfiles): + yield onefile + + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + prefix = f"{self.path}/{prefix.rstrip('/')}" + try: + allfiles = await self.fs._ls(prefix, detail=False) + except FileNotFoundError: + return + for onefile in (a.replace(prefix + "/", "") for a in allfiles): + yield onefile.removeprefix(self.path).removeprefix("/") + + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + find_str = f"{self.path}/{prefix}" + for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False): + yield onefile.removeprefix(find_str) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py new file mode 100644 index 0000000000..c9cb579586 --- /dev/null +++ b/src/zarr/storage/zip.py @@ -0,0 +1,265 @@ +from __future__ import annotations + +import os +import threading +import time +import zipfile +from pathlib import Path +from typing import TYPE_CHECKING, Any, Literal, Self + +from zarr.abc.store import ByteRangeRequest, Store +from zarr.core.buffer import Buffer, BufferPrototype + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Iterable + +ZipStoreAccessModeLiteral = Literal["r", "w", "a"] + + +class ZipStore(Store): + """ + Storage class using a ZIP file. + + Parameters + ---------- + path : str + Location of file. + mode : str, optional + One of 'r' to read an existing file, 'w' to truncate and write a new + file, 'a' to append to an existing file, or 'x' to exclusively create + and write a new file. + compression : int, optional + Compression method to use when writing to the archive. + allowZip64 : bool, optional + If True (the default) will create ZIP files that use the ZIP64 + extensions when the zipfile is larger than 2 GiB. If False + will raise an exception when the ZIP file would require ZIP64 + extensions. + + Attributes + ---------- + allowed_exceptions + supports_writes + supports_deletes + supports_partial_writes + supports_listing + path + compression + allowZip64 + """ + + supports_writes: bool = True + supports_deletes: bool = False + supports_partial_writes: bool = False + supports_listing: bool = True + + path: Path + compression: int + allowZip64: bool + + _zf: zipfile.ZipFile + _lock: threading.RLock + + def __init__( + self, + path: Path | str, + *, + mode: ZipStoreAccessModeLiteral = "r", + compression: int = zipfile.ZIP_STORED, + allowZip64: bool = True, + ) -> None: + super().__init__(mode=mode) + + if isinstance(path, str): + path = Path(path) + assert isinstance(path, Path) + self.path = path # root? + + self._zmode = mode + self.compression = compression + self.allowZip64 = allowZip64 + + def _sync_open(self) -> None: + if self._is_open: + raise ValueError("store is already open") + + self._lock = threading.RLock() + + self._zf = zipfile.ZipFile( + self.path, + mode=self._zmode, + compression=self.compression, + allowZip64=self.allowZip64, + ) + + self._is_open = True + + async def _open(self) -> None: + self._sync_open() + + def __getstate__(self) -> tuple[Path, ZipStoreAccessModeLiteral, int, bool]: + return self.path, self._zmode, self.compression, self.allowZip64 + + def __setstate__(self, state: Any) -> None: + self.path, self._zmode, self.compression, self.allowZip64 = state + self._is_open = False + self._sync_open() + + def close(self) -> None: + # docstring inherited + super().close() + with self._lock: + self._zf.close() + + async def clear(self) -> None: + # docstring inherited + with self._lock: + self._check_writable() + self._zf.close() + os.remove(self.path) + self._zf = zipfile.ZipFile( + self.path, mode="w", compression=self.compression, allowZip64=self.allowZip64 + ) + + async def empty(self) -> bool: + # docstring inherited + with self._lock: + return not self._zf.namelist() + + def with_mode(self, mode: ZipStoreAccessModeLiteral) -> Self: # type: ignore[override] + # docstring inherited + raise NotImplementedError("ZipStore cannot be reopened with a new mode.") + + def __str__(self) -> str: + return f"zip://{self.path}" + + def __repr__(self) -> str: + return f"ZipStore({str(self)!r})" + + def __eq__(self, other: object) -> bool: + return isinstance(other, type(self)) and self.path == other.path + + def _get( + self, + key: str, + prototype: BufferPrototype, + byte_range: ByteRangeRequest | None = None, + ) -> Buffer | None: + # docstring inherited + try: + with self._zf.open(key) as f: # will raise KeyError + if byte_range is None: + return prototype.buffer.from_bytes(f.read()) + start, length = byte_range + if start: + if start < 0: + start = f.seek(start, os.SEEK_END) + start + else: + start = f.seek(start, os.SEEK_SET) + if length: + return prototype.buffer.from_bytes(f.read(length)) + else: + return prototype.buffer.from_bytes(f.read()) + except KeyError: + return None + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: ByteRangeRequest | None = None, + ) -> Buffer | None: + # docstring inherited + assert isinstance(key, str) + + with self._lock: + return self._get(key, prototype=prototype, byte_range=byte_range) + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: Iterable[tuple[str, ByteRangeRequest]], + ) -> list[Buffer | None]: + # docstring inherited + out = [] + with self._lock: + for key, byte_range in key_ranges: + out.append(self._get(key, prototype=prototype, byte_range=byte_range)) + return out + + def _set(self, key: str, value: Buffer) -> None: + # generally, this should be called inside a lock + keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6]) + keyinfo.compress_type = self.compression + if keyinfo.filename[-1] == os.sep: + keyinfo.external_attr = 0o40775 << 16 # drwxrwxr-x + keyinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + keyinfo.external_attr = 0o644 << 16 # ?rw-r--r-- + self._zf.writestr(keyinfo, value.to_bytes()) + + async def set(self, key: str, value: Buffer) -> None: + # docstring inherited + self._check_writable() + assert isinstance(key, str) + if not isinstance(value, Buffer): + raise TypeError("ZipStore.set(): `value` must a Buffer instance") + with self._lock: + self._set(key, value) + + async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: + raise NotImplementedError + + async def set_if_not_exists(self, key: str, value: Buffer) -> None: + self._check_writable() + with self._lock: + members = self._zf.namelist() + if key not in members: + self._set(key, value) + + async def delete(self, key: str) -> None: + # docstring inherited + raise NotImplementedError + + async def exists(self, key: str) -> bool: + # docstring inherited + with self._lock: + try: + self._zf.getinfo(key) + except KeyError: + return False + else: + return True + + async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited + with self._lock: + for key in self._zf.namelist(): + yield key + + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + async for key in self.list(): + if key.startswith(prefix): + yield key.removeprefix(prefix) + + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + if prefix.endswith("/"): + prefix = prefix[:-1] + + keys = self._zf.namelist() + seen = set() + if prefix == "": + keys_unique = {k.split("/")[0] for k in keys} + for key in keys_unique: + if key not in seen: + seen.add(key) + yield key + else: + for key in keys: + if key.startswith(prefix + "/") and key != prefix: + k = key.removeprefix(prefix + "/").split("/")[0] + if k not in seen: + seen.add(k) + yield k diff --git a/src/zarr/testing/__init__.py b/src/zarr/testing/__init__.py new file mode 100644 index 0000000000..0b4d8cf417 --- /dev/null +++ b/src/zarr/testing/__init__.py @@ -0,0 +1,13 @@ +import importlib.util +import warnings + +if importlib.util.find_spec("pytest") is not None: + from zarr.testing.store import StoreTests +else: + warnings.warn("pytest not installed, skipping test suite", stacklevel=2) + +from zarr.testing.utils import assert_bytes_equal + +# TODO: import public buffer tests? + +__all__ = ["StoreTests", "assert_bytes_equal"] diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py new file mode 100644 index 0000000000..c3694e268b --- /dev/null +++ b/src/zarr/testing/buffer.py @@ -0,0 +1,77 @@ +# mypy: ignore-errors +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal + +import numpy as np +import numpy.typing as npt + +from zarr.core.buffer import Buffer, BufferPrototype, cpu +from zarr.storage import MemoryStore + +if TYPE_CHECKING: + from collections.abc import Iterable + from typing import Self + + +__all__ = [ + "NDBufferUsingTestNDArrayLike", + "StoreExpectingTestBuffer", + "TestBuffer", +] + + +class TestNDArrayLike(np.ndarray): + """An example of a ndarray-like class""" + + __test__ = False + + +class TestBuffer(cpu.Buffer): + """Example of a custom Buffer that handles ArrayLike""" + + __test__ = False + + +class NDBufferUsingTestNDArrayLike(cpu.NDBuffer): + """Example of a custom NDBuffer that handles MyNDArrayLike""" + + @classmethod + def create( + cls, + *, + shape: Iterable[int], + dtype: npt.DTypeLike, + order: Literal["C", "F"] = "C", + fill_value: Any | None = None, + ) -> Self: + """Overwrite `NDBuffer.create` to create an TestNDArrayLike instance""" + ret = cls(TestNDArrayLike(shape=shape, dtype=dtype, order=order)) + if fill_value is not None: + ret.fill(fill_value) + return ret + + +class StoreExpectingTestBuffer(MemoryStore): + """Example of a custom Store that expect MyBuffer for all its non-metadata + + We assume that keys containing "json" is metadata + """ + + async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + if "json" not in key: + assert isinstance(value, TestBuffer) + await super().set(key, value, byte_range) + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int, int | None] | None = None, + ) -> Buffer | None: + if "json" not in key: + assert prototype.buffer is TestBuffer + ret = await super().get(key=key, prototype=prototype, byte_range=byte_range) + if ret is not None: + assert isinstance(ret, prototype.buffer) + return ret diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py new file mode 100644 index 0000000000..b4da75b06b --- /dev/null +++ b/src/zarr/testing/store.py @@ -0,0 +1,340 @@ +import pickle +from typing import Any, Generic, TypeVar, cast + +import pytest + +from zarr.abc.store import AccessMode, Store +from zarr.core.buffer import Buffer, default_buffer_prototype +from zarr.core.common import AccessModeLiteral +from zarr.core.sync import _collect_aiterator +from zarr.storage._utils import _normalize_interval_index +from zarr.testing.utils import assert_bytes_equal + +__all__ = ["StoreTests"] + + +S = TypeVar("S", bound=Store) +B = TypeVar("B", bound=Buffer) + + +class StoreTests(Generic[S, B]): + store_cls: type[S] + buffer_cls: type[B] + + async def set(self, store: S, key: str, value: Buffer) -> None: + """ + Insert a value into a storage backend, with a specific key. + This should not not use any store methods. Bypassing the store methods allows them to be + tested. + """ + raise NotImplementedError + + async def get(self, store: S, key: str) -> Buffer: + """ + Retrieve a value from a storage backend, by key. + This should not not use any store methods. Bypassing the store methods allows them to be + tested. + """ + + raise NotImplementedError + + @pytest.fixture + def store_kwargs(self) -> dict[str, Any]: + return {"mode": "r+"} + + @pytest.fixture + async def store(self, store_kwargs: dict[str, Any]) -> Store: + return await self.store_cls.open(**store_kwargs) + + def test_store_type(self, store: S) -> None: + assert isinstance(store, Store) + assert isinstance(store, self.store_cls) + + def test_store_eq(self, store: S, store_kwargs: dict[str, Any]) -> None: + # check self equality + assert store == store + + # check store equality with same inputs + # asserting this is important for being able to compare (de)serialized stores + store2 = self.store_cls(**store_kwargs) + assert store == store2 + + def test_serializable_store(self, store: S) -> None: + foo = pickle.dumps(store) + assert pickle.loads(foo) == store + + def test_store_mode(self, store: S, store_kwargs: dict[str, Any]) -> None: + assert store.mode == AccessMode.from_literal("r+") + assert not store.mode.readonly + + with pytest.raises(AttributeError): + store.mode = AccessMode.from_literal("w") # type: ignore[misc] + + @pytest.mark.parametrize("mode", ["r", "r+", "a", "w", "w-"]) + async def test_store_open_mode( + self, store_kwargs: dict[str, Any], mode: AccessModeLiteral + ) -> None: + store_kwargs["mode"] = mode + store = await self.store_cls.open(**store_kwargs) + assert store._is_open + assert store.mode == AccessMode.from_literal(mode) + + async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> None: + kwargs = {**store_kwargs, "mode": "r"} + store = await self.store_cls.open(**kwargs) + assert store.mode == AccessMode.from_literal("r") + assert store.mode.readonly + + # set + with pytest.raises(ValueError): + await store.set("foo", self.buffer_cls.from_bytes(b"bar")) + + # delete + with pytest.raises(ValueError): + await store.delete("foo") + + def test_store_repr(self, store: S) -> None: + raise NotImplementedError + + def test_store_supports_writes(self, store: S) -> None: + raise NotImplementedError + + def test_store_supports_partial_writes(self, store: S) -> None: + raise NotImplementedError + + def test_store_supports_listing(self, store: S) -> None: + raise NotImplementedError + + @pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"]) + @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""]) + @pytest.mark.parametrize("byte_range", [None, (0, None), (1, None), (1, 2), (None, 1)]) + async def test_get( + self, store: S, key: str, data: bytes, byte_range: None | tuple[int | None, int | None] + ) -> None: + """ + Ensure that data can be read from the store using the store.get method. + """ + data_buf = self.buffer_cls.from_bytes(data) + await self.set(store, key, data_buf) + observed = await store.get(key, prototype=default_buffer_prototype(), byte_range=byte_range) + start, length = _normalize_interval_index(data_buf, interval=byte_range) + expected = data_buf[start : start + length] + assert_bytes_equal(observed, expected) + + async def test_get_many(self, store: S) -> None: + """ + Ensure that multiple keys can be retrieved at once with the _get_many method. + """ + keys = tuple(map(str, range(10))) + values = tuple(f"{k}".encode() for k in keys) + for k, v in zip(keys, values, strict=False): + await self.set(store, k, self.buffer_cls.from_bytes(v)) + observed_buffers = await _collect_aiterator( + store._get_many( + zip( + keys, + (default_buffer_prototype(),) * len(keys), + (None,) * len(keys), + strict=False, + ) + ) + ) + observed_kvs = sorted(((k, b.to_bytes()) for k, b in observed_buffers)) # type: ignore[union-attr] + expected_kvs = sorted(((k, b) for k, b in zip(keys, values, strict=False))) + assert observed_kvs == expected_kvs + + @pytest.mark.parametrize("key", ["zarr.json", "c/0", "foo/c/0.0", "foo/0/0"]) + @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""]) + async def test_set(self, store: S, key: str, data: bytes) -> None: + """ + Ensure that data can be written to the store using the store.set method. + """ + assert not store.mode.readonly + data_buf = self.buffer_cls.from_bytes(data) + await store.set(key, data_buf) + observed = await self.get(store, key) + assert_bytes_equal(observed, data_buf) + + async def test_set_many(self, store: S) -> None: + """ + Test that a dict of key : value pairs can be inserted into the store via the + `_set_many` method. + """ + keys = ["zarr.json", "c/0", "foo/c/0.0", "foo/0/0"] + data_buf = [self.buffer_cls.from_bytes(k.encode()) for k in keys] + store_dict = dict(zip(keys, data_buf, strict=True)) + await store._set_many(store_dict.items()) + for k, v in store_dict.items(): + assert (await self.get(store, k)).to_bytes() == v.to_bytes() + + @pytest.mark.parametrize( + "key_ranges", + [ + [], + [("zarr.json", (0, 1))], + [("c/0", (0, 1)), ("zarr.json", (0, None))], + [("c/0/0", (0, 1)), ("c/0/1", (None, 2)), ("c/0/2", (0, 3))], + ], + ) + async def test_get_partial_values( + self, store: S, key_ranges: list[tuple[str, tuple[int | None, int | None]]] + ) -> None: + # put all of the data + for key, _ in key_ranges: + await self.set(store, key, self.buffer_cls.from_bytes(bytes(key, encoding="utf-8"))) + + # read back just part of it + observed_maybe = await store.get_partial_values( + prototype=default_buffer_prototype(), key_ranges=key_ranges + ) + + observed: list[Buffer] = [] + expected: list[Buffer] = [] + + for obs in observed_maybe: + assert obs is not None + observed.append(obs) + + for idx in range(len(observed)): + key, byte_range = key_ranges[idx] + result = await store.get( + key, prototype=default_buffer_prototype(), byte_range=byte_range + ) + assert result is not None + expected.append(result) + + assert all( + obs.to_bytes() == exp.to_bytes() for obs, exp in zip(observed, expected, strict=True) + ) + + async def test_exists(self, store: S) -> None: + assert not await store.exists("foo") + await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) + assert await store.exists("foo/zarr.json") + + async def test_delete(self, store: S) -> None: + await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) + assert await store.exists("foo/zarr.json") + await store.delete("foo/zarr.json") + assert not await store.exists("foo/zarr.json") + + async def test_empty(self, store: S) -> None: + assert await store.empty() + await self.set( + store, "key", self.buffer_cls.from_bytes(bytes("something", encoding="utf-8")) + ) + assert not await store.empty() + + async def test_clear(self, store: S) -> None: + await self.set( + store, "key", self.buffer_cls.from_bytes(bytes("something", encoding="utf-8")) + ) + await store.clear() + assert await store.empty() + + async def test_list(self, store: S) -> None: + assert await _collect_aiterator(store.list()) == () + prefix = "foo" + data = self.buffer_cls.from_bytes(b"") + store_dict = { + prefix + "/zarr.json": data, + **{prefix + f"/c/{idx}": data for idx in range(10)}, + } + await store._set_many(store_dict.items()) + expected_sorted = sorted(store_dict.keys()) + observed = await _collect_aiterator(store.list()) + observed_sorted = sorted(observed) + assert observed_sorted == expected_sorted + + async def test_list_prefix(self, store: S) -> None: + """ + Test that the `list_prefix` method works as intended. Given a prefix, it should return + all the keys in storage that start with this prefix. Keys should be returned with the shared + prefix removed. + """ + prefixes = ("", "a/", "a/b/", "a/b/c/") + data = self.buffer_cls.from_bytes(b"") + fname = "zarr.json" + store_dict = {p + fname: data for p in prefixes} + + await store._set_many(store_dict.items()) + + for prefix in prefixes: + observed = tuple(sorted(await _collect_aiterator(store.list_prefix(prefix)))) + expected: tuple[str, ...] = () + for key in store_dict: + if key.startswith(prefix): + expected += (key.removeprefix(prefix),) + expected = tuple(sorted(expected)) + assert observed == expected + + async def test_list_dir(self, store: S) -> None: + root = "foo" + store_dict = { + root + "/zarr.json": self.buffer_cls.from_bytes(b"bar"), + root + "/c/1": self.buffer_cls.from_bytes(b"\x01"), + } + + assert await _collect_aiterator(store.list_dir("")) == () + assert await _collect_aiterator(store.list_dir(root)) == () + + await store._set_many(store_dict.items()) + + keys_observed = await _collect_aiterator(store.list_dir(root)) + keys_expected = {k.removeprefix(root + "/").split("/")[0] for k in store_dict} + + assert sorted(keys_observed) == sorted(keys_expected) + + keys_observed = await _collect_aiterator(store.list_dir(root + "/")) + assert sorted(keys_expected) == sorted(keys_observed) + + async def test_with_mode(self, store: S) -> None: + data = b"0000" + await self.set(store, "key", self.buffer_cls.from_bytes(data)) + assert (await self.get(store, "key")).to_bytes() == data + + for mode in ["r", "a"]: + mode = cast(AccessModeLiteral, mode) + clone = store.with_mode(mode) + # await store.close() + await clone._ensure_open() + assert clone.mode == AccessMode.from_literal(mode) + assert isinstance(clone, type(store)) + + # earlier writes are visible + result = await clone.get("key", default_buffer_prototype()) + assert result is not None + assert result.to_bytes() == data + + # writes to original after with_mode is visible + await self.set(store, "key-2", self.buffer_cls.from_bytes(data)) + result = await clone.get("key-2", default_buffer_prototype()) + assert result is not None + assert result.to_bytes() == data + + if mode == "a": + # writes to clone is visible in the original + await clone.set("key-3", self.buffer_cls.from_bytes(data)) + result = await clone.get("key-3", default_buffer_prototype()) + assert result is not None + assert result.to_bytes() == data + + else: + with pytest.raises(ValueError, match="store mode"): + await clone.set("key-3", self.buffer_cls.from_bytes(data)) + + async def test_set_if_not_exists(self, store: S) -> None: + key = "k" + data_buf = self.buffer_cls.from_bytes(b"0000") + await self.set(store, key, data_buf) + + new = self.buffer_cls.from_bytes(b"1111") + await store.set_if_not_exists("k", new) # no error + + result = await store.get(key, default_buffer_prototype()) + assert result == data_buf + + await store.set_if_not_exists("k2", new) # no error + + result = await store.get("k2", default_buffer_prototype()) + assert result == new diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py new file mode 100644 index 0000000000..bb9fda65a1 --- /dev/null +++ b/src/zarr/testing/strategies.py @@ -0,0 +1,191 @@ +from typing import Any, Literal + +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st +import numpy as np +from hypothesis import given, settings # noqa: F401 +from hypothesis.strategies import SearchStrategy + +from zarr.core.array import Array +from zarr.core.group import Group +from zarr.storage import MemoryStore, StoreLike + +# Copied from Xarray +_attr_keys = st.text(st.characters(), min_size=1) +_attr_values = st.recursive( + st.none() | st.booleans() | st.text(st.characters(), max_size=5), + lambda children: st.lists(children) | st.dictionaries(_attr_keys, children), + max_leaves=3, +) + + +def v3_dtypes() -> st.SearchStrategy[np.dtype]: + return ( + npst.boolean_dtypes() + | npst.integer_dtypes(endianness="=") + | npst.unsigned_integer_dtypes(endianness="=") + | npst.floating_dtypes(endianness="=") + | npst.complex_number_dtypes(endianness="=") + # | npst.byte_string_dtypes(endianness="=") + # | npst.unicode_string_dtypes() + # | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + ) + + +def v2_dtypes() -> st.SearchStrategy[np.dtype]: + return ( + npst.boolean_dtypes() + | npst.integer_dtypes(endianness="=") + | npst.unsigned_integer_dtypes(endianness="=") + | npst.floating_dtypes(endianness="=") + | npst.complex_number_dtypes(endianness="=") + | npst.byte_string_dtypes(endianness="=") + | npst.unicode_string_dtypes(endianness="=") + | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + ) + + +# From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names +# 1. must not be the empty string ("") +# 2. must not include the character "/" +# 3. must not be a string composed only of period characters, e.g. "." or ".." +# 4. must not start with the reserved prefix "__" +zarr_key_chars = st.sampled_from( + ".-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" +) +node_names = st.text(zarr_key_chars, min_size=1).filter( + lambda t: t not in (".", "..") and not t.startswith("__") +) +array_names = node_names +attrs = st.none() | st.dictionaries(_attr_keys, _attr_values) +keys = st.lists(node_names, min_size=1).map("/".join) +paths = st.just("/") | keys +stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) +compressors = st.sampled_from([None, "default"]) +zarr_formats: st.SearchStrategy[Literal[2, 3]] = st.sampled_from([2, 3]) +array_shapes = npst.array_shapes(max_dims=4) + + +@st.composite # type: ignore[misc] +def numpy_arrays( + draw: st.DrawFn, + *, + shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, + zarr_formats: st.SearchStrategy[Literal[2, 3]] = zarr_formats, +) -> Any: + """ + Generate numpy arrays that can be saved in the provided Zarr format. + """ + zarr_format = draw(zarr_formats) + return draw(npst.arrays(dtype=v3_dtypes() if zarr_format == 3 else v2_dtypes(), shape=shapes)) + + +@st.composite # type: ignore[misc] +def np_array_and_chunks( + draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays +) -> tuple[np.ndarray, tuple[int]]: # type: ignore[type-arg] + """A hypothesis strategy to generate small sized random arrays. + + Returns: a tuple of the array and a suitable random chunking for it. + """ + array = draw(arrays) + # We want this strategy to shrink towards arrays with smaller number of chunks + # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks + numchunks = draw(st.tuples(*[st.integers(min_value=1, max_value=size) for size in array.shape])) + # 2. and now generate the chunks tuple + chunks = tuple(size // nchunks for size, nchunks in zip(array.shape, numchunks, strict=True)) + return (array, chunks) + + +@st.composite # type: ignore[misc] +def arrays( + draw: st.DrawFn, + *, + shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, + compressors: st.SearchStrategy = compressors, + stores: st.SearchStrategy[StoreLike] = stores, + paths: st.SearchStrategy[None | str] = paths, + array_names: st.SearchStrategy = array_names, + arrays: st.SearchStrategy | None = None, + attrs: st.SearchStrategy = attrs, + zarr_formats: st.SearchStrategy = zarr_formats, +) -> Array: + store = draw(stores) + path = draw(paths) + name = draw(array_names) + attributes = draw(attrs) + zarr_format = draw(zarr_formats) + if arrays is None: + arrays = numpy_arrays(shapes=shapes, zarr_formats=st.just(zarr_format)) + nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) + # test that None works too. + fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)])) + # compressor = draw(compressors) + + expected_attrs = {} if attributes is None else attributes + + array_path = path + ("/" if not path.endswith("/") else "") + name + root = Group.from_store(store, zarr_format=zarr_format) + + a = root.create_array( + array_path, + shape=nparray.shape, + chunks=chunks, + dtype=nparray.dtype, + attributes=attributes, + # compressor=compressor, # FIXME + fill_value=fill_value, + ) + + assert isinstance(a, Array) + if a.metadata.zarr_format == 3: + assert a.fill_value is not None + assert isinstance(root[array_path], Array) + assert nparray.shape == a.shape + assert chunks == a.chunks + assert array_path == a.path, (path, name, array_path, a.name, a.path) + assert a.basename == name, (a.basename, name) + assert dict(a.attrs) == expected_attrs + + a[:] = nparray + + return a + + +def is_negative_slice(idx: Any) -> bool: + return isinstance(idx, slice) and idx.step is not None and idx.step < 0 + + +@st.composite # type: ignore[misc] +def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs) -> Any: # type: ignore[no-untyped-def] + """Basic indices without unsupported negative slices.""" + return draw( + npst.basic_indices(shape=shape, **kwargs).filter( + lambda idxr: ( + not ( + is_negative_slice(idxr) + or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) + ) + ) + ) + ) + + +def key_ranges( + keys: SearchStrategy = node_names, max_size: int | None = None +) -> SearchStrategy[list[int]]: + """ + Function to generate key_ranges strategy for get_partial_values() + returns list strategy w/ form:: + + [(key, (range_start, range_step)), + (key, (range_start, range_step)),...] + """ + byte_ranges = st.tuples( + st.none() | st.integers(min_value=0, max_value=max_size), + st.none() | st.integers(min_value=0, max_value=max_size), + ) + key_tuple = st.tuples(keys, byte_ranges) + return st.lists(key_tuple, min_size=1, max_size=10) diff --git a/src/zarr/testing/utils.py b/src/zarr/testing/utils.py new file mode 100644 index 0000000000..9d6dfa7e18 --- /dev/null +++ b/src/zarr/testing/utils.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, cast + +import pytest + +from zarr.core.buffer import Buffer + +if TYPE_CHECKING: + from zarr.core.common import BytesLike + +__all__ = ["assert_bytes_equal"] + + +def assert_bytes_equal(b1: Buffer | BytesLike | None, b2: Buffer | BytesLike | None) -> None: + """Help function to assert if two bytes-like or Buffers are equal + + Warnings + -------- + Always copies data, only use for testing and debugging + """ + if isinstance(b1, Buffer): + b1 = b1.to_bytes() + if isinstance(b2, Buffer): + b2 = b2.to_bytes() + assert b1 == b2 + + +def has_cupy() -> bool: + try: + import cupy + + return cast(bool, cupy.cuda.runtime.getDeviceCount() > 0) + except ImportError: + return False + except cupy.cuda.runtime.CUDARuntimeError: + return False + + +# Decorator for GPU tests +def gpu_test(func: Any) -> Any: + return pytest.mark.gpu( + pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")(func) + ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_strings.py b/tests/test_strings.py new file mode 100644 index 0000000000..dca0570a25 --- /dev/null +++ b/tests/test_strings.py @@ -0,0 +1,35 @@ +"""Tests for the strings module.""" + +import numpy as np +import pytest + +from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING, _STRING_DTYPE, cast_to_string_dtype + + +def test_string_defaults() -> None: + if _NUMPY_SUPPORTS_VLEN_STRING: + assert _STRING_DTYPE == np.dtypes.StringDType() + else: + assert _STRING_DTYPE == np.dtypes.ObjectDType() + + +def test_cast_to_string_dtype() -> None: + d1 = np.array(["a", "b", "c"]) + assert d1.dtype == np.dtype(" LocalStore | MemoryStore | RemoteStore | ZipStore: + if store == "local": + return await LocalStore.open(path, mode="w") + if store == "memory": + return await MemoryStore.open(mode="w") + if store == "remote": + return await RemoteStore.open(url=path, mode="w") + if store == "zip": + return await ZipStore.open(path + "/zarr.zip", mode="w") + raise AssertionError + + +@pytest.fixture(params=[str, pathlib.Path]) +def path_type(request: pytest.FixtureRequest) -> Any: + return request.param + + +# todo: harmonize this with local_store fixture +@pytest.fixture +async def store_path(tmpdir: LEGACY_PATH) -> StorePath: + store = await LocalStore.open(str(tmpdir), mode="w") + return StorePath(store) + + +@pytest.fixture +async def local_store(tmpdir: LEGACY_PATH) -> LocalStore: + return await LocalStore.open(str(tmpdir), mode="w") + + +@pytest.fixture +async def remote_store(url: str) -> RemoteStore: + return await RemoteStore.open(url, mode="w") + + +@pytest.fixture +async def memory_store() -> MemoryStore: + return await MemoryStore.open(mode="w") + + +@pytest.fixture +async def zip_store(tmpdir: LEGACY_PATH) -> ZipStore: + return await ZipStore.open(str(tmpdir / "zarr.zip"), mode="w") + + +@pytest.fixture +async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: + param = request.param + return await parse_store(param, str(tmpdir)) + + +@pytest.fixture(params=["local", "memory", "zip"]) +def sync_store(request: pytest.FixtureRequest, tmp_path: LEGACY_PATH) -> Store: + result = sync(parse_store(request.param, str(tmp_path))) + if not isinstance(result, Store): + raise TypeError("Wrong store class returned by test fixture! got " + result + " instead") + return result + + +@dataclass +class AsyncGroupRequest: + zarr_format: ZarrFormat + store: Literal["local", "remote", "memory", "zip"] + attributes: dict[str, Any] = field(default_factory=dict) + + +@pytest.fixture +async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> AsyncGroup: + param: AsyncGroupRequest = request.param + + store = await parse_store(param.store, str(tmpdir)) + return await AsyncGroup.from_store( + store, + attributes=param.attributes, + zarr_format=param.zarr_format, + exists_ok=False, + ) + + +@pytest.fixture(params=["numpy", "cupy"]) +def xp(request: pytest.FixtureRequest) -> Any: + """Fixture to parametrize over numpy-like libraries""" + + if request.param == "cupy": + request.node.add_marker(pytest.mark.gpu) + + return pytest.importorskip(request.param) + + +@pytest.fixture(autouse=True) +def reset_config() -> Generator[None, None, None]: + config.reset() + yield + config.reset() + + +@dataclass +class ArrayRequest: + shape: ChunkCoords + dtype: str + order: MemoryOrder + + +@pytest.fixture +def array_fixture(request: pytest.FixtureRequest) -> npt.NDArray[Any]: + array_request: ArrayRequest = request.param + return ( + np.arange(np.prod(array_request.shape)) + .reshape(array_request.shape, order=array_request.order) + .astype(array_request.dtype) + ) + + +@pytest.fixture(params=(2, 3)) +def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: + if request.param == 2: + return 2 + elif request.param == 3: + return 3 + msg = f"Invalid zarr format requested. Got {request.param}, expected on of (2,3)." + raise ValueError(msg) + + +settings.register_profile( + "ci", + max_examples=1000, + deadline=None, + suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], +) +settings.register_profile( + "local", + max_examples=300, + suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], + verbosity=Verbosity.verbose, +) diff --git a/tests/v3/package_with_entrypoint-0.1.dist-info/entry_points.txt b/tests/v3/package_with_entrypoint-0.1.dist-info/entry_points.txt new file mode 100644 index 0000000000..eee724c912 --- /dev/null +++ b/tests/v3/package_with_entrypoint-0.1.dist-info/entry_points.txt @@ -0,0 +1,14 @@ +[zarr.codecs] +test = package_with_entrypoint:TestEntrypointCodec +[zarr.codecs.test] +another_codec = package_with_entrypoint:TestEntrypointGroup.Codec +[zarr] +codec_pipeline = package_with_entrypoint:TestEntrypointCodecPipeline +ndbuffer = package_with_entrypoint:TestEntrypointNDBuffer +buffer = package_with_entrypoint:TestEntrypointBuffer +[zarr.buffer] +another_buffer = package_with_entrypoint:TestEntrypointGroup.Buffer +[zarr.ndbuffer] +another_ndbuffer = package_with_entrypoint:TestEntrypointGroup.NDBuffer +[zarr.codec_pipeline] +another_pipeline = package_with_entrypoint:TestEntrypointGroup.Pipeline diff --git a/tests/v3/package_with_entrypoint/__init__.py b/tests/v3/package_with_entrypoint/__init__.py new file mode 100644 index 0000000000..b818adf8ea --- /dev/null +++ b/tests/v3/package_with_entrypoint/__init__.py @@ -0,0 +1,66 @@ +from collections.abc import Iterable + +from numpy import ndarray + +import zarr.core.buffer +from zarr.abc.codec import ArrayBytesCodec, CodecInput, CodecOutput, CodecPipeline +from zarr.codecs import BytesCodec +from zarr.core.array_spec import ArraySpec +from zarr.core.buffer import Buffer, NDBuffer +from zarr.core.common import BytesLike + + +class TestEntrypointCodec(ArrayBytesCodec): + is_fixed_size = True + + async def encode( + self, + chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]], + ) -> Iterable[CodecOutput | None]: + pass + + async def decode( + self, + chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]], + ) -> ndarray: + pass + + def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: + return input_byte_length + + +class TestEntrypointCodecPipeline(CodecPipeline): + def __init__(self, batch_size: int = 1) -> None: + pass + + async def encode( + self, chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]] + ) -> BytesLike: + pass + + async def decode( + self, chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]] + ) -> ndarray: + pass + + +class TestEntrypointBuffer(Buffer): + pass + + +class TestEntrypointNDBuffer(NDBuffer): + pass + + +class TestEntrypointGroup: + class Codec(BytesCodec): + pass + + class Buffer(zarr.core.buffer.Buffer): + pass + + class NDBuffer(zarr.core.buffer.NDBuffer): + pass + + class Pipeline(CodecPipeline): + pass diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py new file mode 100644 index 0000000000..0614185f68 --- /dev/null +++ b/tests/v3/test_api.py @@ -0,0 +1,960 @@ +import pathlib +import warnings + +import numpy as np +import pytest +from numpy.testing import assert_array_equal + +import zarr +import zarr.api.asynchronous +import zarr.core.group +from zarr import Array, Group +from zarr.abc.store import Store +from zarr.api.synchronous import create, group, load, open, open_group, save, save_array, save_group +from zarr.core.common import ZarrFormat +from zarr.errors import MetadataValidationError +from zarr.storage.memory import MemoryStore + + +def test_create_array(memory_store: Store) -> None: + store = memory_store + + # create array + z = create(shape=100, store=store) + assert isinstance(z, Array) + assert z.shape == (100,) + + # create array, overwrite, specify chunk shape + z = create(shape=200, chunk_shape=20, store=store, overwrite=True) + assert isinstance(z, Array) + assert z.shape == (200,) + assert z.chunks == (20,) + + # create array, overwrite, specify chunk shape via chunks param + z = create(shape=400, chunks=40, store=store, overwrite=True) + assert isinstance(z, Array) + assert z.shape == (400,) + assert z.chunks == (40,) + + +async def test_open_array(memory_store: MemoryStore) -> None: + store = memory_store + + # open array, create if doesn't exist + z = open(store=store, shape=100) + assert isinstance(z, Array) + assert z.shape == (100,) + + # open array, overwrite + # store._store_dict = {} + store = MemoryStore(mode="w") + z = open(store=store, shape=200) + assert isinstance(z, Array) + assert z.shape == (200,) + + # open array, read-only + store_cls = type(store) + ro_store = await store_cls.open(store_dict=store._store_dict, mode="r") + z = open(store=ro_store) + assert isinstance(z, Array) + assert z.shape == (200,) + assert z.read_only + + # path not found + with pytest.raises(FileNotFoundError): + open(store="doesnotexist", mode="r") + + +async def test_open_group(memory_store: MemoryStore) -> None: + store = memory_store + + # open group, create if doesn't exist + g = open_group(store=store) + g.create_group("foo") + assert isinstance(g, Group) + assert "foo" in g + + # open group, overwrite + # g = open_group(store=store) + # assert isinstance(g, Group) + # assert "foo" not in g + + # open group, read-only + store_cls = type(store) + ro_store = await store_cls.open(store_dict=store._store_dict, mode="r") + g = open_group(store=ro_store) + assert isinstance(g, Group) + # assert g.read_only + + +@pytest.mark.parametrize("zarr_format", [None, 2, 3]) +async def test_open_group_unspecified_version( + tmpdir: pathlib.Path, zarr_format: ZarrFormat +) -> None: + """regression test for https://github.com/zarr-developers/zarr-python/issues/2175""" + + # create a group with specified zarr format (could be 2, 3, or None) + _ = await zarr.api.asynchronous.open_group( + store=str(tmpdir), mode="w", zarr_format=zarr_format, attributes={"foo": "bar"} + ) + + # now open that group without specifying the format + g2 = await zarr.api.asynchronous.open_group(store=str(tmpdir), mode="r") + + assert g2.attrs == {"foo": "bar"} + + if zarr_format is not None: + assert g2.metadata.zarr_format == zarr_format + + +def test_save_errors() -> None: + with pytest.raises(ValueError): + # no arrays provided + save_group("data/group.zarr") + with pytest.raises(TypeError): + # no array provided + save_array("data/group.zarr") + with pytest.raises(ValueError): + # no arrays provided + save("data/group.zarr") + + +def test_open_with_mode_r(tmp_path: pathlib.Path) -> None: + # 'r' means read only (must exist) + with pytest.raises(FileNotFoundError): + zarr.open(store=tmp_path, mode="r") + z1 = zarr.ones(store=tmp_path, shape=(3, 3)) + assert z1.fill_value == 1 + z2 = zarr.open(store=tmp_path, mode="r") + assert isinstance(z2, Array) + assert z2.fill_value == 1 + assert (z2[:] == 1).all() + with pytest.raises(ValueError): + z2[:] = 3 + + +def test_open_with_mode_r_plus(tmp_path: pathlib.Path) -> None: + # 'r+' means read/write (must exist) + with pytest.raises(FileNotFoundError): + zarr.open(store=tmp_path, mode="r+") + zarr.ones(store=tmp_path, shape=(3, 3)) + z2 = zarr.open(store=tmp_path, mode="r+") + assert isinstance(z2, Array) + assert (z2[:] == 1).all() + z2[:] = 3 + + +async def test_open_with_mode_a(tmp_path: pathlib.Path) -> None: + # Open without shape argument should default to group + g = zarr.open(store=tmp_path, mode="a") + assert isinstance(g, Group) + await g.store_path.delete() + + # 'a' means read/write (create if doesn't exist) + arr = zarr.open(store=tmp_path, mode="a", shape=(3, 3)) + assert isinstance(arr, Array) + arr[...] = 1 + z2 = zarr.open(store=tmp_path, mode="a") + assert isinstance(z2, Array) + assert (z2[:] == 1).all() + z2[:] = 3 + + +def test_open_with_mode_w(tmp_path: pathlib.Path) -> None: + # 'w' means create (overwrite if exists); + arr = zarr.open(store=tmp_path, mode="w", shape=(3, 3)) + assert isinstance(arr, Array) + + arr[...] = 3 + z2 = zarr.open(store=tmp_path, mode="w", shape=(3, 3)) + assert isinstance(z2, Array) + assert not (z2[:] == 3).all() + z2[:] = 3 + + +def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: + # 'w-' means create (fail if exists) + arr = zarr.open(store=tmp_path, mode="w-", shape=(3, 3)) + assert isinstance(arr, Array) + arr[...] = 1 + with pytest.raises(FileExistsError): + zarr.open(store=tmp_path, mode="w-") + + +# def test_lazy_loader(): +# foo = np.arange(100) +# bar = np.arange(100, 0, -1) +# store = "data/group.zarr" +# save(store, foo=foo, bar=bar) +# loader = load(store) +# assert "foo" in loader +# assert "bar" in loader +# assert "baz" not in loader +# assert len(loader) == 2 +# assert sorted(loader) == ["bar", "foo"] +# assert_array_equal(foo, loader["foo"]) +# assert_array_equal(bar, loader["bar"]) +# assert "LazyLoader: " in repr(loader) + + +def test_load_array(memory_store: Store) -> None: + store = memory_store + foo = np.arange(100) + bar = np.arange(100, 0, -1) + save(store, foo=foo, bar=bar) + + # can also load arrays directly into a numpy array + for array_name in ["foo", "bar"]: + array = load(store, path=array_name) + assert isinstance(array, np.ndarray) + if array_name == "foo": + assert_array_equal(foo, array) + else: + assert_array_equal(bar, array) + + +def test_tree() -> None: + g1 = zarr.group() + g1.create_group("foo") + g3 = g1.create_group("bar") + g3.create_group("baz") + g5 = g3.create_group("qux") + g5.create_array("baz", shape=100, chunks=10) + # TODO: complete after tree has been reimplemented + # assert repr(zarr.tree(g1)) == repr(g1.tree()) + # assert str(zarr.tree(g1)) == str(g1.tree()) + + +# @pytest.mark.parametrize("stores_from_path", [False, True]) +# @pytest.mark.parametrize( +# "with_chunk_store,listable", +# [(False, True), (True, True), (False, False)], +# ids=["default-listable", "with_chunk_store-listable", "default-unlistable"], +# ) +# def test_consolidate_metadata(with_chunk_store, listable, monkeypatch, stores_from_path): +# # setup initial data +# if stores_from_path: +# store = tempfile.mkdtemp() +# atexit.register(atexit_rmtree, store) +# if with_chunk_store: +# chunk_store = tempfile.mkdtemp() +# atexit.register(atexit_rmtree, chunk_store) +# else: +# chunk_store = None +# else: +# store = MemoryStore() +# chunk_store = MemoryStore() if with_chunk_store else None +# path = None +# z = group(store, chunk_store=chunk_store, path=path) + +# # Reload the actual store implementation in case str +# store_to_copy = z.store + +# z.create_group("g1") +# g2 = z.create_group("g2") +# g2.attrs["hello"] = "world" +# arr = g2.create_array("arr", shape=(20, 20), chunks=(5, 5), dtype="f8") +# assert 16 == arr.nchunks +# assert 0 == arr.nchunks_initialized +# arr.attrs["data"] = 1 +# arr[:] = 1.0 +# assert 16 == arr.nchunks_initialized + +# if stores_from_path: +# # get the actual store class for use with consolidate_metadata +# store_class = z._store +# else: +# store_class = store + +# # perform consolidation +# out = consolidate_metadata(store_class, path=path) +# assert isinstance(out, Group) +# assert ["g1", "g2"] == list(out) +# if not stores_from_path: +# assert isinstance(out._store, ConsolidatedMetadataStore) +# assert ".zmetadata" in store +# meta_keys = [ +# ".zgroup", +# "g1/.zgroup", +# "g2/.zgroup", +# "g2/.zattrs", +# "g2/arr/.zarray", +# "g2/arr/.zattrs", +# ] + +# for key in meta_keys: +# del store[key] + +# # https://github.com/zarr-developers/zarr-python/issues/993 +# # Make sure we can still open consolidated on an unlistable store: +# if not listable: +# fs_memory = pytest.importorskip("fsspec.implementations.memory") +# monkeypatch.setattr(fs_memory.MemoryFileSystem, "isdir", lambda x, y: False) +# monkeypatch.delattr(fs_memory.MemoryFileSystem, "ls") +# fs = fs_memory.MemoryFileSystem() +# store_to_open = FSStore("", fs=fs) +# # copy original store to new unlistable store +# store_to_open.update(store_to_copy) + +# else: +# store_to_open = store + +# # open consolidated +# z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path) +# assert ["g1", "g2"] == list(z2) +# assert "world" == z2.g2.attrs["hello"] +# assert 1 == z2.g2.arr.attrs["data"] +# assert (z2.g2.arr[:] == 1.0).all() +# assert 16 == z2.g2.arr.nchunks +# if listable: +# assert 16 == z2.g2.arr.nchunks_initialized +# else: +# with pytest.raises(NotImplementedError): +# _ = z2.g2.arr.nchunks_initialized + +# if stores_from_path: +# # path string is note a BaseStore subclass so cannot be used to +# # initialize a ConsolidatedMetadataStore. + +# with pytest.raises(ValueError): +# cmd = ConsolidatedMetadataStore(store) +# else: +# # tests del/write on the store + +# cmd = ConsolidatedMetadataStore(store) +# with pytest.raises(PermissionError): +# del cmd[".zgroup"] +# with pytest.raises(PermissionError): +# cmd[".zgroup"] = None + +# # test getsize on the store +# assert isinstance(getsize(cmd), Integral) + +# # test new metadata are not writeable +# with pytest.raises(PermissionError): +# z2.create_group("g3") +# with pytest.raises(PermissionError): +# z2.create_dataset("spam", shape=42, chunks=7, dtype="i4") +# with pytest.raises(PermissionError): +# del z2["g2"] + +# # test consolidated metadata are not writeable +# with pytest.raises(PermissionError): +# z2.g2.attrs["hello"] = "universe" +# with pytest.raises(PermissionError): +# z2.g2.arr.attrs["foo"] = "bar" + +# # test the data are writeable +# z2.g2.arr[:] = 2 +# assert (z2.g2.arr[:] == 2).all() + +# # test invalid modes +# with pytest.raises(ValueError): +# open_consolidated(store, chunk_store=chunk_store, mode="a", path=path) +# with pytest.raises(ValueError): +# open_consolidated(store, chunk_store=chunk_store, mode="w", path=path) +# with pytest.raises(ValueError): +# open_consolidated(store, chunk_store=chunk_store, mode="w-", path=path) + +# # make sure keyword arguments are passed through without error +# open_consolidated( +# store, +# chunk_store=chunk_store, +# path=path, +# cache_attrs=True, +# synchronizer=None, +# ) + + +# @pytest.mark.parametrize( +# "options", +# ( +# {"dimension_separator": "/"}, +# {"dimension_separator": "."}, +# {"dimension_separator": None}, +# ), +# ) +# def test_save_array_separator(tmpdir, options): +# data = np.arange(6).reshape((3, 2)) +# url = tmpdir.join("test.zarr") +# save_array(url, data, **options) + + +# class TestCopyStore(unittest.TestCase): +# _version = 2 + +# def setUp(self): +# source = dict() +# source["foo"] = b"xxx" +# source["bar/baz"] = b"yyy" +# source["bar/qux"] = b"zzz" +# self.source = source + +# def _get_dest_store(self): +# return dict() + +# def test_no_paths(self): +# source = self.source +# dest = self._get_dest_store() +# copy_store(source, dest) +# assert len(source) == len(dest) +# for key in source: +# assert source[key] == dest[key] + +# def test_source_path(self): +# source = self.source +# # paths should be normalized +# for source_path in "bar", "bar/", "/bar", "/bar/": +# dest = self._get_dest_store() +# copy_store(source, dest, source_path=source_path) +# assert 2 == len(dest) +# for key in source: +# if key.startswith("bar/"): +# dest_key = key.split("bar/")[1] +# assert source[key] == dest[dest_key] +# else: +# assert key not in dest + +# def test_dest_path(self): +# source = self.source +# # paths should be normalized +# for dest_path in "new", "new/", "/new", "/new/": +# dest = self._get_dest_store() +# copy_store(source, dest, dest_path=dest_path) +# assert len(source) == len(dest) +# for key in source: +# if self._version == 3: +# dest_key = key[:10] + "new/" + key[10:] +# else: +# dest_key = "new/" + key +# assert source[key] == dest[dest_key] + +# def test_source_dest_path(self): +# source = self.source +# # paths should be normalized +# for source_path in "bar", "bar/", "/bar", "/bar/": +# for dest_path in "new", "new/", "/new", "/new/": +# dest = self._get_dest_store() +# copy_store(source, dest, source_path=source_path, dest_path=dest_path) +# assert 2 == len(dest) +# for key in source: +# if key.startswith("bar/"): +# dest_key = "new/" + key.split("bar/")[1] +# assert source[key] == dest[dest_key] +# else: +# assert key not in dest +# assert ("new/" + key) not in dest + +# def test_excludes_includes(self): +# source = self.source + +# # single excludes +# dest = self._get_dest_store() +# excludes = "f.*" +# copy_store(source, dest, excludes=excludes) +# assert len(dest) == 2 + +# root = "" +# assert root + "foo" not in dest + +# # multiple excludes +# dest = self._get_dest_store() +# excludes = "b.z", ".*x" +# copy_store(source, dest, excludes=excludes) +# assert len(dest) == 1 +# assert root + "foo" in dest +# assert root + "bar/baz" not in dest +# assert root + "bar/qux" not in dest + +# # excludes and includes +# dest = self._get_dest_store() +# excludes = "b.*" +# includes = ".*x" +# copy_store(source, dest, excludes=excludes, includes=includes) +# assert len(dest) == 2 +# assert root + "foo" in dest +# assert root + "bar/baz" not in dest +# assert root + "bar/qux" in dest + +# def test_dry_run(self): +# source = self.source +# dest = self._get_dest_store() +# copy_store(source, dest, dry_run=True) +# assert 0 == len(dest) + +# def test_if_exists(self): +# source = self.source +# dest = self._get_dest_store() +# root = "" +# dest[root + "bar/baz"] = b"mmm" + +# # default ('raise') +# with pytest.raises(CopyError): +# copy_store(source, dest) + +# # explicit 'raise' +# with pytest.raises(CopyError): +# copy_store(source, dest, if_exists="raise") + +# # skip +# copy_store(source, dest, if_exists="skip") +# assert 3 == len(dest) +# assert dest[root + "foo"] == b"xxx" +# assert dest[root + "bar/baz"] == b"mmm" +# assert dest[root + "bar/qux"] == b"zzz" + +# # replace +# copy_store(source, dest, if_exists="replace") +# assert 3 == len(dest) +# assert dest[root + "foo"] == b"xxx" +# assert dest[root + "bar/baz"] == b"yyy" +# assert dest[root + "bar/qux"] == b"zzz" + +# # invalid option +# with pytest.raises(ValueError): +# copy_store(source, dest, if_exists="foobar") + + +# def check_copied_array(original, copied, without_attrs=False, expect_props=None): +# # setup +# source_h5py = original.__module__.startswith("h5py.") +# dest_h5py = copied.__module__.startswith("h5py.") +# zarr_to_zarr = not (source_h5py or dest_h5py) +# h5py_to_h5py = source_h5py and dest_h5py +# zarr_to_h5py = not source_h5py and dest_h5py +# h5py_to_zarr = source_h5py and not dest_h5py +# if expect_props is None: +# expect_props = dict() +# else: +# expect_props = expect_props.copy() + +# # common properties in zarr and h5py +# for p in "dtype", "shape", "chunks": +# expect_props.setdefault(p, getattr(original, p)) + +# # zarr-specific properties +# if zarr_to_zarr: +# for p in "compressor", "filters", "order", "fill_value": +# expect_props.setdefault(p, getattr(original, p)) + +# # h5py-specific properties +# if h5py_to_h5py: +# for p in ( +# "maxshape", +# "compression", +# "compression_opts", +# "shuffle", +# "scaleoffset", +# "fletcher32", +# "fillvalue", +# ): +# expect_props.setdefault(p, getattr(original, p)) + +# # common properties with some name differences +# if h5py_to_zarr: +# expect_props.setdefault("fill_value", original.fillvalue) +# if zarr_to_h5py: +# expect_props.setdefault("fillvalue", original.fill_value) + +# # compare properties +# for k, v in expect_props.items(): +# assert v == getattr(copied, k) + +# # compare data +# assert_array_equal(original[:], copied[:]) + +# # compare attrs +# if without_attrs: +# for k in original.attrs.keys(): +# assert k not in copied.attrs +# else: +# if dest_h5py and "filters" in original.attrs: +# # special case in v3 (storing filters metadata under attributes) +# # we explicitly do not copy this info over to HDF5 +# original_attrs = original.attrs.asdict().copy() +# original_attrs.pop("filters") +# else: +# original_attrs = original.attrs +# assert sorted(original_attrs.items()) == sorted(copied.attrs.items()) + + +# def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): +# # setup +# if expect_props is None: +# expect_props = dict() +# else: +# expect_props = expect_props.copy() + +# # compare children +# for k, v in original.items(): +# if hasattr(v, "shape"): +# assert k in copied +# check_copied_array(v, copied[k], without_attrs=without_attrs, expect_props=expect_props) +# elif shallow: +# assert k not in copied +# else: +# assert k in copied +# check_copied_group( +# v, +# copied[k], +# without_attrs=without_attrs, +# shallow=shallow, +# expect_props=expect_props, +# ) + +# # compare attrs +# if without_attrs: +# for k in original.attrs.keys(): +# assert k not in copied.attrs +# else: +# assert sorted(original.attrs.items()) == sorted(copied.attrs.items()) + + +# def test_copy_all(): +# """ +# https://github.com/zarr-developers/zarr-python/issues/269 + +# copy_all used to not copy attributes as `.keys()` does not return hidden `.zattrs`. + +# """ +# original_group = zarr.group(store=MemoryStore(), overwrite=True) +# original_group.attrs["info"] = "group attrs" +# original_subgroup = original_group.create_group("subgroup") +# original_subgroup.attrs["info"] = "sub attrs" + +# destination_group = zarr.group(store=MemoryStore(), overwrite=True) + +# # copy from memory to directory store +# copy_all( +# original_group, +# destination_group, +# dry_run=False, +# ) + +# assert "subgroup" in destination_group +# assert destination_group.attrs["info"] == "group attrs" +# assert destination_group.subgroup.attrs["info"] == "sub attrs" + + +# class TestCopy: +# @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) +# def source(self, request, tmpdir): +# def prep_source(source): +# foo = source.create_group("foo") +# foo.attrs["experiment"] = "weird science" +# baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) +# baz.attrs["units"] = "metres" +# if request.param: +# extra_kws = dict( +# compression="gzip", +# compression_opts=3, +# fillvalue=84, +# shuffle=True, +# fletcher32=True, +# ) +# else: +# extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) +# source.create_dataset( +# "spam", +# data=np.arange(100, 200).reshape(20, 5), +# chunks=(10, 2), +# dtype="i2", +# **extra_kws, +# ) +# return source + +# if request.param: +# h5py = pytest.importorskip("h5py") +# fn = tmpdir.join("source.h5") +# with h5py.File(str(fn), mode="w") as h5f: +# yield prep_source(h5f) +# else: +# yield prep_source(group()) + +# @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) +# def dest(self, request, tmpdir): +# if request.param: +# h5py = pytest.importorskip("h5py") +# fn = tmpdir.join("dest.h5") +# with h5py.File(str(fn), mode="w") as h5f: +# yield h5f +# else: +# yield group() + +# def test_copy_array(self, source, dest): +# # copy array with default options +# copy(source["foo/bar/baz"], dest) +# check_copied_array(source["foo/bar/baz"], dest["baz"]) +# copy(source["spam"], dest) +# check_copied_array(source["spam"], dest["spam"]) + +# def test_copy_bad_dest(self, source, dest): +# # try to copy to an array, dest must be a group +# dest = dest.create_dataset("eggs", shape=(100,)) +# with pytest.raises(ValueError): +# copy(source["foo/bar/baz"], dest) + +# def test_copy_array_name(self, source, dest): +# # copy array with name +# copy(source["foo/bar/baz"], dest, name="qux") +# assert "baz" not in dest +# check_copied_array(source["foo/bar/baz"], dest["qux"]) + +# def test_copy_array_create_options(self, source, dest): +# dest_h5py = dest.__module__.startswith("h5py.") + +# # copy array, provide creation options +# compressor = Zlib(9) +# create_kws = dict(chunks=(10,)) +# if dest_h5py: +# create_kws.update( +# compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 +# ) +# else: +# create_kws.update(compressor=compressor, fill_value=42, order="F", filters=[Adler32()]) +# copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) +# check_copied_array( +# source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws +# ) + +# def test_copy_array_exists_array(self, source, dest): +# # copy array, dest array in the way +# dest.create_dataset("baz", shape=(10,)) + +# # raise +# with pytest.raises(CopyError): +# # should raise by default +# copy(source["foo/bar/baz"], dest) +# assert (10,) == dest["baz"].shape +# with pytest.raises(CopyError): +# copy(source["foo/bar/baz"], dest, if_exists="raise") +# assert (10,) == dest["baz"].shape + +# # skip +# copy(source["foo/bar/baz"], dest, if_exists="skip") +# assert (10,) == dest["baz"].shape + +# # replace +# copy(source["foo/bar/baz"], dest, if_exists="replace") +# check_copied_array(source["foo/bar/baz"], dest["baz"]) + +# # invalid option +# with pytest.raises(ValueError): +# copy(source["foo/bar/baz"], dest, if_exists="foobar") + +# def test_copy_array_exists_group(self, source, dest): +# # copy array, dest group in the way +# dest.create_group("baz") + +# # raise +# with pytest.raises(CopyError): +# copy(source["foo/bar/baz"], dest) +# assert not hasattr(dest["baz"], "shape") +# with pytest.raises(CopyError): +# copy(source["foo/bar/baz"], dest, if_exists="raise") +# assert not hasattr(dest["baz"], "shape") + +# # skip +# copy(source["foo/bar/baz"], dest, if_exists="skip") +# assert not hasattr(dest["baz"], "shape") + +# # replace +# copy(source["foo/bar/baz"], dest, if_exists="replace") +# check_copied_array(source["foo/bar/baz"], dest["baz"]) + +# def test_copy_array_skip_initialized(self, source, dest): +# dest_h5py = dest.__module__.startswith("h5py.") + +# dest.create_dataset("baz", shape=(100,), chunks=(10,), dtype="i8") +# assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) + +# if dest_h5py: +# with pytest.raises(ValueError): +# # not available with copy to h5py +# copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") + +# else: +# # copy array, dest array exists but not yet initialized +# copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") +# check_copied_array(source["foo/bar/baz"], dest["baz"]) + +# # copy array, dest array exists and initialized, will be skipped +# dest["baz"][:] = np.arange(100, 200) +# copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") +# assert_array_equal(np.arange(100, 200), dest["baz"][:]) +# assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) + +# def test_copy_group(self, source, dest): +# # copy group, default options +# copy(source["foo"], dest) +# check_copied_group(source["foo"], dest["foo"]) + +# def test_copy_group_no_name(self, source, dest): +# with pytest.raises(TypeError): +# # need a name if copy root +# copy(source, dest) + +# copy(source, dest, name="root") +# check_copied_group(source, dest["root"]) + +# def test_copy_group_options(self, source, dest): +# # copy group, non-default options +# copy(source["foo"], dest, name="qux", without_attrs=True) +# assert "foo" not in dest +# check_copied_group(source["foo"], dest["qux"], without_attrs=True) + +# def test_copy_group_shallow(self, source, dest): +# # copy group, shallow +# copy(source, dest, name="eggs", shallow=True) +# check_copied_group(source, dest["eggs"], shallow=True) + +# def test_copy_group_exists_group(self, source, dest): +# # copy group, dest groups exist +# dest.create_group("foo/bar") +# copy(source["foo"], dest) +# check_copied_group(source["foo"], dest["foo"]) + +# def test_copy_group_exists_array(self, source, dest): +# # copy group, dest array in the way +# dest.create_dataset("foo/bar", shape=(10,)) + +# # raise +# with pytest.raises(CopyError): +# copy(source["foo"], dest) +# assert dest["foo/bar"].shape == (10,) +# with pytest.raises(CopyError): +# copy(source["foo"], dest, if_exists="raise") +# assert dest["foo/bar"].shape == (10,) + +# # skip +# copy(source["foo"], dest, if_exists="skip") +# assert dest["foo/bar"].shape == (10,) + +# # replace +# copy(source["foo"], dest, if_exists="replace") +# check_copied_group(source["foo"], dest["foo"]) + +# def test_copy_group_dry_run(self, source, dest): +# # dry run, empty destination +# n_copied, n_skipped, n_bytes_copied = copy( +# source["foo"], dest, dry_run=True, return_stats=True +# ) +# assert 0 == len(dest) +# assert 3 == n_copied +# assert 0 == n_skipped +# assert 0 == n_bytes_copied + +# # dry run, array exists in destination +# baz = np.arange(100, 200) +# dest.create_dataset("foo/bar/baz", data=baz) +# assert not np.all(source["foo/bar/baz"][:] == dest["foo/bar/baz"][:]) +# assert 1 == len(dest) + +# # raise +# with pytest.raises(CopyError): +# copy(source["foo"], dest, dry_run=True) +# assert 1 == len(dest) + +# # skip +# n_copied, n_skipped, n_bytes_copied = copy( +# source["foo"], dest, dry_run=True, if_exists="skip", return_stats=True +# ) +# assert 1 == len(dest) +# assert 2 == n_copied +# assert 1 == n_skipped +# assert 0 == n_bytes_copied +# assert_array_equal(baz, dest["foo/bar/baz"]) + +# # replace +# n_copied, n_skipped, n_bytes_copied = copy( +# source["foo"], dest, dry_run=True, if_exists="replace", return_stats=True +# ) +# assert 1 == len(dest) +# assert 3 == n_copied +# assert 0 == n_skipped +# assert 0 == n_bytes_copied +# assert_array_equal(baz, dest["foo/bar/baz"]) + +# def test_logging(self, source, dest, tmpdir): +# # callable log +# copy(source["foo"], dest, dry_run=True, log=print) + +# # file name +# fn = str(tmpdir.join("log_name")) +# copy(source["foo"], dest, dry_run=True, log=fn) + +# # file +# with tmpdir.join("log_file").open(mode="w") as f: +# copy(source["foo"], dest, dry_run=True, log=f) + +# # bad option +# with pytest.raises(TypeError): +# copy(source["foo"], dest, dry_run=True, log=True) + + +def test_open_positional_args_deprecated() -> None: + store = MemoryStore({}, mode="w") + with pytest.warns(FutureWarning, match="pass"): + open(store, "w", shape=(1,)) + + +def test_save_array_positional_args_deprecated() -> None: + store = MemoryStore({}, mode="w") + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", message="zarr_version is deprecated", category=DeprecationWarning + ) + with pytest.warns(FutureWarning, match="pass"): + save_array( + store, + np.ones( + 1, + ), + 3, + ) + + +def test_group_positional_args_deprecated() -> None: + store = MemoryStore({}, mode="w") + with pytest.warns(FutureWarning, match="pass"): + group(store, True) + + +def test_open_group_positional_args_deprecated() -> None: + store = MemoryStore({}, mode="w") + with pytest.warns(FutureWarning, match="pass"): + open_group(store, "w") + + +def test_open_falls_back_to_open_group() -> None: + # https://github.com/zarr-developers/zarr-python/issues/2309 + store = MemoryStore(mode="w") + zarr.open_group(store, attributes={"key": "value"}) + + group = zarr.open(store) + assert isinstance(group, Group) + assert group.attrs == {"key": "value"} + + +async def test_open_falls_back_to_open_group_async() -> None: + # https://github.com/zarr-developers/zarr-python/issues/2309 + store = MemoryStore(mode="w") + await zarr.api.asynchronous.open_group(store, attributes={"key": "value"}) + + group = await zarr.api.asynchronous.open(store=store) + assert isinstance(group, zarr.core.group.AsyncGroup) + assert group.attrs == {"key": "value"} + + +async def test_metadata_validation_error() -> None: + with pytest.raises( + MetadataValidationError, + match="Invalid value for 'zarr_format'. Expected '2, 3, or None'. Got '3.0'.", + ): + await zarr.api.asynchronous.open_group(zarr_format="3.0") # type: ignore[arg-type] + + with pytest.raises( + MetadataValidationError, + match="Invalid value for 'zarr_format'. Expected '2, 3, or None'. Got '3.0'.", + ): + await zarr.api.asynchronous.open_array(shape=(1,), zarr_format="3.0") # type: ignore[arg-type] diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py new file mode 100644 index 0000000000..b558c826d6 --- /dev/null +++ b/tests/v3/test_array.py @@ -0,0 +1,420 @@ +import pickle +from itertools import accumulate +from typing import Any, Literal + +import numpy as np +import pytest + +import zarr.api.asynchronous +import zarr.storage +from zarr import Array, AsyncArray, Group +from zarr.codecs import BytesCodec, VLenBytesCodec +from zarr.core.array import chunks_initialized +from zarr.core.buffer.cpu import NDBuffer +from zarr.core.common import JSON, ZarrFormat +from zarr.core.group import AsyncGroup +from zarr.core.indexing import ceildiv +from zarr.core.sync import sync +from zarr.errors import ContainsArrayError, ContainsGroupError +from zarr.storage import LocalStore, MemoryStore +from zarr.storage.common import StorePath + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("exists_ok", [True, False]) +@pytest.mark.parametrize("extant_node", ["array", "group"]) +def test_array_creation_existing_node( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, + exists_ok: bool, + extant_node: Literal["array", "group"], +) -> None: + """ + Check that an existing array or group is handled as expected during array creation. + """ + spath = StorePath(store) + group = Group.from_store(spath, zarr_format=zarr_format) + expected_exception: type[ContainsArrayError | ContainsGroupError] + if extant_node == "array": + expected_exception = ContainsArrayError + _ = group.create_array("extant", shape=(10,), dtype="uint8") + elif extant_node == "group": + expected_exception = ContainsGroupError + _ = group.create_group("extant") + else: + raise AssertionError + + new_shape = (2, 2) + new_dtype = "float32" + + if exists_ok: + arr_new = Array.create( + spath / "extant", + shape=new_shape, + dtype=new_dtype, + exists_ok=exists_ok, + zarr_format=zarr_format, + ) + assert arr_new.shape == new_shape + assert arr_new.dtype == new_dtype + else: + with pytest.raises(expected_exception): + arr_new = Array.create( + spath / "extant", + shape=new_shape, + dtype=new_dtype, + exists_ok=exists_ok, + zarr_format=zarr_format, + ) + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +async def test_create_creates_parents( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat +) -> None: + # prepare a root node, with some data set + await zarr.api.asynchronous.open_group( + store=store, path="a", zarr_format=zarr_format, attributes={"key": "value"} + ) + + # create a child node with a couple intermediates + await zarr.api.asynchronous.create( + shape=(2, 2), store=store, path="a/b/c/d", zarr_format=zarr_format + ) + parts = ["a", "a/b", "a/b/c"] + + if zarr_format == 2: + files = [".zattrs", ".zgroup"] + else: + files = ["zarr.json"] + + expected = [f"{part}/{file}" for file in files for part in parts] + + if zarr_format == 2: + expected.extend([".zattrs", ".zgroup", "a/b/c/d/.zarray", "a/b/c/d/.zattrs"]) + else: + expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) + + expected = sorted(expected) + + result = sorted([x async for x in store.list_prefix("")]) + + assert result == expected + + paths = ["a", "a/b", "a/b/c"] + for path in paths: + g = await zarr.api.asynchronous.open_group(store=store, path=path) + assert isinstance(g, AsyncGroup) + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_array_name_properties_no_group( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat +) -> None: + arr = Array.create(store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4") + assert arr.path == "" + assert arr.name is None + assert arr.basename is None + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_array_name_properties_with_group( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat +) -> None: + root = Group.from_store(store=store, zarr_format=zarr_format) + foo = root.create_array("foo", shape=(100,), chunks=(10,), dtype="i4") + assert foo.path == "foo" + assert foo.name == "/foo" + assert foo.basename == "foo" + + bar = root.create_group("bar") + spam = bar.create_array("spam", shape=(100,), chunks=(10,), dtype="i4") + + assert spam.path == "bar/spam" + assert spam.name == "/bar/spam" + assert spam.basename == "spam" + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("specifiy_fill_value", [True, False]) +@pytest.mark.parametrize("dtype_str", ["bool", "uint8", "complex64"]) +def test_array_v3_fill_value_default( + store: MemoryStore, specifiy_fill_value: bool, dtype_str: str +) -> None: + """ + Test that creating an array with the fill_value parameter set to None, or unspecified, + results in the expected fill_value attribute of the array, i.e. 0 cast to the array's dtype. + """ + shape = (10,) + default_fill_value = 0 + if specifiy_fill_value: + arr = Array.create( + store=store, + shape=shape, + dtype=dtype_str, + zarr_format=3, + chunk_shape=shape, + fill_value=None, + ) + else: + arr = Array.create( + store=store, shape=shape, dtype=dtype_str, zarr_format=3, chunk_shape=shape + ) + + assert arr.fill_value == np.dtype(dtype_str).type(default_fill_value) + assert arr.fill_value.dtype == arr.dtype + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize( + ("dtype_str", "fill_value"), + [("bool", True), ("uint8", 99), ("float32", -99.9), ("complex64", 3 + 4j)], +) +def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str) -> None: + shape = (10,) + arr = Array.create( + store=store, + shape=shape, + dtype=dtype_str, + zarr_format=3, + chunk_shape=shape, + fill_value=fill_value, + ) + + assert arr.fill_value == np.dtype(dtype_str).type(fill_value) + assert arr.fill_value.dtype == arr.dtype + + +def test_create_positional_args_deprecated() -> None: + store = MemoryStore({}, mode="w") + with pytest.warns(FutureWarning, match="Pass"): + Array.create(store, (2, 2), dtype="f8") + + +def test_selection_positional_args_deprecated() -> None: + store = MemoryStore({}, mode="w") + arr = Array.create(store, shape=(2, 2), dtype="f8") + + with pytest.warns(FutureWarning, match="Pass out"): + arr.get_basic_selection(..., NDBuffer(array=np.empty((2, 2)))) + + with pytest.warns(FutureWarning, match="Pass fields"): + arr.set_basic_selection(..., 1, None) + + with pytest.warns(FutureWarning, match="Pass out"): + arr.get_orthogonal_selection(..., NDBuffer(array=np.empty((2, 2)))) + + with pytest.warns(FutureWarning, match="Pass"): + arr.set_orthogonal_selection(..., 1, None) + + with pytest.warns(FutureWarning, match="Pass"): + arr.get_mask_selection(np.zeros((2, 2), dtype=bool), NDBuffer(array=np.empty((0,)))) + + with pytest.warns(FutureWarning, match="Pass"): + arr.set_mask_selection(np.zeros((2, 2), dtype=bool), 1, None) + + with pytest.warns(FutureWarning, match="Pass"): + arr.get_coordinate_selection(([0, 1], [0, 1]), NDBuffer(array=np.empty((2,)))) + + with pytest.warns(FutureWarning, match="Pass"): + arr.set_coordinate_selection(([0, 1], [0, 1]), 1, None) + + with pytest.warns(FutureWarning, match="Pass"): + arr.get_block_selection((0, slice(None)), NDBuffer(array=np.empty((2, 2)))) + + with pytest.warns(FutureWarning, match="Pass"): + arr.set_block_selection((0, slice(None)), 1, None) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +async def test_array_v3_nan_fill_value(store: MemoryStore) -> None: + shape = (10,) + arr = Array.create( + store=store, + shape=shape, + dtype=np.float64, + zarr_format=3, + chunk_shape=shape, + fill_value=np.nan, + ) + arr[:] = np.nan + + assert np.isnan(arr.fill_value) + assert arr.fill_value.dtype == arr.dtype + # all fill value chunk is an empty chunk, and should not be written + assert len([a async for a in store.list_prefix("/")]) == 0 + + +@pytest.mark.parametrize("store", ["local"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +async def test_serializable_async_array( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat +) -> None: + expected = await AsyncArray.create( + store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4" + ) + # await expected.setitems(list(range(100))) + + p = pickle.dumps(expected) + actual = pickle.loads(p) + + assert actual == expected + # np.testing.assert_array_equal(await actual.getitem(slice(None)), await expected.getitem(slice(None))) + # TODO: uncomment the parts of this test that will be impacted by the config/prototype changes in flight + + +@pytest.mark.parametrize("store", ["local"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> None: + expected = Array.create( + store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4" + ) + expected[:] = list(range(100)) + + p = pickle.dumps(expected) + actual = pickle.loads(p) + + assert actual == expected + np.testing.assert_array_equal(actual[:], expected[:]) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_storage_transformers(store: MemoryStore) -> None: + """ + Test that providing an actual storage transformer produces a warning and otherwise passes through + """ + metadata_dict: dict[str, JSON] = { + "zarr_format": 3, + "node_type": "array", + "shape": (10,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": "uint8", + "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, + "codecs": (BytesCodec().to_dict(),), + "fill_value": 0, + "storage_transformers": ({"test": "should_raise"}), + } + match = "Arrays with storage transformers are not supported in zarr-python at this time." + with pytest.raises(ValueError, match=match): + Array.from_dict(StorePath(store), data=metadata_dict) + + +@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) +@pytest.mark.parametrize("nchunks", [2, 5, 10]) +def test_nchunks(test_cls: type[Array] | type[AsyncArray[Any]], nchunks: int) -> None: + """ + Test that nchunks returns the number of chunks defined for the array. + """ + store = MemoryStore({}, mode="w") + shape = 100 + arr = Array.create(store, shape=(shape,), chunks=(ceildiv(shape, nchunks),), dtype="i4") + expected = nchunks + if test_cls == Array: + observed = arr.nchunks + else: + observed = arr._async_array.nchunks + assert observed == expected + + +@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) +def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: + """ + Test that nchunks_initialized accurately returns the number of stored chunks. + """ + store = MemoryStore({}, mode="w") + arr = Array.create(store, shape=(100,), chunks=(10,), dtype="i4") + + # write chunks one at a time + for idx, region in enumerate(arr._iter_chunk_regions()): + arr[region] = 1 + expected = idx + 1 + if test_cls == Array: + observed = arr.nchunks_initialized + else: + observed = arr._async_array.nchunks_initialized + assert observed == expected + + # delete chunks + for idx, key in enumerate(arr._iter_chunk_keys()): + sync(arr.store_path.store.delete(key)) + if test_cls == Array: + observed = arr.nchunks_initialized + else: + observed = arr._async_array.nchunks_initialized + expected = arr.nchunks - idx - 1 + assert observed == expected + + +@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) +def test_chunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: + """ + Test that chunks_initialized accurately returns the keys of stored chunks. + """ + store = MemoryStore({}, mode="w") + arr = Array.create(store, shape=(100,), chunks=(10,), dtype="i4") + + chunks_accumulated = tuple( + accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys())) + ) + for keys, region in zip(chunks_accumulated, arr._iter_chunk_regions(), strict=False): + arr[region] = 1 + + if test_cls == Array: + observed = sorted(chunks_initialized(arr)) + else: + observed = sorted(chunks_initialized(arr._async_array)) + + expected = sorted(keys) + assert observed == expected + + +def test_default_fill_values() -> None: + a = Array.create(MemoryStore({}, mode="w"), shape=5, chunk_shape=5, dtype=" None: + with pytest.raises(ValueError, match="At least one ArrayBytesCodec is required."): + Array.create(MemoryStore({}, mode="w"), shape=5, chunk_shape=5, dtype=" None: + # regression test for https://github.com/zarr-developers/zarr-python/issues/2328 + store = MemoryStore({}, mode="w") + arr = Array.create(store=store, shape=5, chunk_shape=5, dtype="f8", zarr_format=zarr_format) + arr.attrs["foo"] = "bar" + assert arr.attrs["foo"] == "bar" + + arr2 = zarr.open_array(store=store, zarr_format=zarr_format) + assert arr2.attrs["foo"] == "bar" diff --git a/tests/v3/test_attributes.py b/tests/v3/test_attributes.py new file mode 100644 index 0000000000..12097eb2bc --- /dev/null +++ b/tests/v3/test_attributes.py @@ -0,0 +1,22 @@ +import zarr.core +import zarr.core.attributes +import zarr.storage + + +def test_put() -> None: + store = zarr.storage.MemoryStore({}, mode="w") + attrs = zarr.core.attributes.Attributes( + zarr.Group.from_store(store, attributes={"a": 1, "b": 2}) + ) + attrs.put({"a": 3, "c": 4}) + expected = {"a": 3, "c": 4} + assert dict(attrs) == expected + + +def test_asdict() -> None: + store = zarr.storage.MemoryStore({}, mode="w") + attrs = zarr.core.attributes.Attributes( + zarr.Group.from_store(store, attributes={"a": 1, "b": 2}) + ) + result = attrs.asdict() + assert result == {"a": 1, "b": 2} diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py new file mode 100644 index 0000000000..60816d764e --- /dev/null +++ b/tests/v3/test_buffer.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +from zarr import AsyncArray +from zarr.codecs.blosc import BloscCodec +from zarr.codecs.bytes import BytesCodec +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.gzip import GzipCodec +from zarr.codecs.transpose import TransposeCodec +from zarr.codecs.zstd import ZstdCodec +from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, cpu, gpu +from zarr.storage.common import StorePath +from zarr.storage.memory import MemoryStore +from zarr.testing.buffer import ( + NDBufferUsingTestNDArrayLike, + StoreExpectingTestBuffer, + TestBuffer, + TestNDArrayLike, +) +from zarr.testing.utils import gpu_test + +if TYPE_CHECKING: + import types + +try: + import cupy as cp +except ImportError: + cp = None + + +if TYPE_CHECKING: + import types + + +def test_nd_array_like(xp: types.ModuleType) -> None: + ary = xp.arange(10) + assert isinstance(ary, ArrayLike) + assert isinstance(ary, NDArrayLike) + + +@pytest.mark.asyncio +async def test_async_array_prototype() -> None: + """Test the use of a custom buffer prototype""" + + expect = np.zeros((9, 9), dtype="uint16", order="F") + a = await AsyncArray.create( + StorePath(StoreExpectingTestBuffer(mode="w")) / "test_async_array_prototype", + shape=expect.shape, + chunk_shape=(5, 5), + dtype=expect.dtype, + fill_value=0, + ) + expect[1:4, 3:6] = np.ones((3, 3)) + + my_prototype = BufferPrototype(buffer=TestBuffer, nd_buffer=NDBufferUsingTestNDArrayLike) + + await a.setitem( + selection=(slice(1, 4), slice(3, 6)), + value=np.ones((3, 3)), + prototype=my_prototype, + ) + got = await a.getitem(selection=(slice(0, 9), slice(0, 9)), prototype=my_prototype) + # ignoring a mypy error here that TestNDArrayLike doesn't meet the NDArrayLike protocol + # The test passes, so it clearly does. + assert isinstance(got, TestNDArrayLike) # type: ignore[unreachable] + assert np.array_equal(expect, got) # type: ignore[unreachable] + + +@gpu_test +@pytest.mark.asyncio +async def test_async_array_gpu_prototype() -> None: + """Test the use of the GPU buffer prototype""" + + expect = cp.zeros((9, 9), dtype="uint16", order="F") + a = await AsyncArray.create( + StorePath(MemoryStore(mode="w")) / "test_async_array_gpu_prototype", + shape=expect.shape, + chunk_shape=(5, 5), + dtype=expect.dtype, + fill_value=0, + ) + expect[1:4, 3:6] = cp.ones((3, 3)) + + await a.setitem( + selection=(slice(1, 4), slice(3, 6)), + value=cp.ones((3, 3)), + prototype=gpu.buffer_prototype, + ) + got = await a.getitem(selection=(slice(0, 9), slice(0, 9)), prototype=gpu.buffer_prototype) + assert isinstance(got, cp.ndarray) + assert cp.array_equal(expect, got) + + +@pytest.mark.asyncio +async def test_codecs_use_of_prototype() -> None: + expect = np.zeros((10, 10), dtype="uint16", order="F") + a = await AsyncArray.create( + StorePath(StoreExpectingTestBuffer(mode="w")) / "test_codecs_use_of_prototype", + shape=expect.shape, + chunk_shape=(5, 5), + dtype=expect.dtype, + fill_value=0, + codecs=[ + TransposeCodec(order=(1, 0)), + BytesCodec(), + BloscCodec(), + Crc32cCodec(), + GzipCodec(), + ZstdCodec(), + ], + ) + expect[:] = np.arange(100).reshape(10, 10) + + my_prototype = BufferPrototype(buffer=TestBuffer, nd_buffer=NDBufferUsingTestNDArrayLike) + + await a.setitem( + selection=(slice(0, 10), slice(0, 10)), + value=expect[:], + prototype=my_prototype, + ) + got = await a.getitem(selection=(slice(0, 10), slice(0, 10)), prototype=my_prototype) + # ignoring a mypy error here that TestNDArrayLike doesn't meet the NDArrayLike protocol + # The test passes, so it clearly does. + assert isinstance(got, TestNDArrayLike) # type: ignore[unreachable] + assert np.array_equal(expect, got) # type: ignore[unreachable] + + +@gpu_test +@pytest.mark.asyncio +async def test_codecs_use_of_gpu_prototype() -> None: + expect = cp.zeros((10, 10), dtype="uint16", order="F") + a = await AsyncArray.create( + StorePath(MemoryStore(mode="w")) / "test_codecs_use_of_gpu_prototype", + shape=expect.shape, + chunk_shape=(5, 5), + dtype=expect.dtype, + fill_value=0, + codecs=[ + TransposeCodec(order=(1, 0)), + BytesCodec(), + BloscCodec(), + Crc32cCodec(), + GzipCodec(), + ZstdCodec(), + ], + ) + expect[:] = cp.arange(100).reshape(10, 10) + + await a.setitem( + selection=(slice(0, 10), slice(0, 10)), + value=expect[:], + prototype=gpu.buffer_prototype, + ) + got = await a.getitem(selection=(slice(0, 10), slice(0, 10)), prototype=gpu.buffer_prototype) + assert isinstance(got, cp.ndarray) + assert cp.array_equal(expect, got) + + +def test_numpy_buffer_prototype() -> None: + buffer = cpu.buffer_prototype.buffer.create_zero_length() + ndbuffer = cpu.buffer_prototype.nd_buffer.create(shape=(1, 2), dtype=np.dtype("int64")) + assert isinstance(buffer.as_array_like(), np.ndarray) + assert isinstance(ndbuffer.as_ndarray_like(), np.ndarray) diff --git a/tests/v3/test_chunk_grids.py b/tests/v3/test_chunk_grids.py new file mode 100644 index 0000000000..4c69c483ae --- /dev/null +++ b/tests/v3/test_chunk_grids.py @@ -0,0 +1,54 @@ +from typing import Any + +import numpy as np +import pytest + +from zarr.core.chunk_grids import _guess_chunks, normalize_chunks + + +@pytest.mark.parametrize( + "shape", [(0,), (0,) * 2, (1, 2, 0, 4, 5), (10, 0), (10,), (100,) * 3, (1000000,), (10000,) * 2] +) +@pytest.mark.parametrize("itemsize", [1, 2, 4]) +def test_guess_chunks(shape: tuple[int, ...], itemsize: int) -> None: + chunks = _guess_chunks(shape, itemsize) + chunk_size = np.prod(chunks) * itemsize + assert isinstance(chunks, tuple) + assert len(chunks) == len(shape) + assert chunk_size < (64 * 1024 * 1024) + # doesn't make any sense to allow chunks to have zero length dimension + assert all(0 < c <= max(s, 1) for c, s in zip(chunks, shape, strict=False)) + + +@pytest.mark.parametrize( + ("chunks", "shape", "typesize", "expected"), + [ + ((10,), (100,), 1, (10,)), + ([10], (100,), 1, (10,)), + (10, (100,), 1, (10,)), + ((10, 10), (100, 10), 1, (10, 10)), + (10, (100, 10), 1, (10, 10)), + ((10, None), (100, 10), 1, (10, 10)), + (30, (100, 20, 10), 1, (30, 30, 30)), + ((30,), (100, 20, 10), 1, (30, 20, 10)), + ((30, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, None, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, 20, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, 20, 10), (100, 20, 10), 1, (30, 20, 10)), + # auto chunking + (None, (100,), 1, (100,)), + (-1, (100,), 1, (100,)), + ((30, -1, None), (100, 20, 10), 1, (30, 20, 10)), + ], +) +def test_normalize_chunks( + chunks: Any, shape: tuple[int, ...], typesize: int, expected: tuple[int, ...] +) -> None: + assert expected == normalize_chunks(chunks, shape, typesize) + + +def test_normalize_chunks_errors() -> None: + with pytest.raises(ValueError): + normalize_chunks("foo", (100,), 1) + with pytest.raises(ValueError): + normalize_chunks((100, 10), (100,), 1) diff --git a/tests/v3/test_codec_entrypoints.py b/tests/v3/test_codec_entrypoints.py new file mode 100644 index 0000000000..e1ef027dd4 --- /dev/null +++ b/tests/v3/test_codec_entrypoints.py @@ -0,0 +1,50 @@ +import os.path +import sys +from collections.abc import Generator + +import pytest + +import zarr.registry +from zarr import config + +here = os.path.abspath(os.path.dirname(__file__)) + + +@pytest.fixture +def set_path() -> Generator[None, None, None]: + sys.path.append(here) + zarr.registry._collect_entrypoints() + yield + sys.path.remove(here) + registries = zarr.registry._collect_entrypoints() + for registry in registries: + registry.lazy_load_list.clear() + config.reset() + + +@pytest.mark.usefixtures("set_path") +@pytest.mark.parametrize("codec_name", ["TestEntrypointCodec", "TestEntrypointGroup.Codec"]) +def test_entrypoint_codec(codec_name: str) -> None: + config.set({"codecs.test": "package_with_entrypoint." + codec_name}) + cls_test = zarr.registry.get_codec_class("test") + assert cls_test.__qualname__ == codec_name + + +@pytest.mark.usefixtures("set_path") +def test_entrypoint_pipeline() -> None: + config.set({"codec_pipeline.path": "package_with_entrypoint.TestEntrypointCodecPipeline"}) + cls = zarr.registry.get_pipeline_class() + assert cls.__name__ == "TestEntrypointCodecPipeline" + + +@pytest.mark.usefixtures("set_path") +@pytest.mark.parametrize("buffer_name", ["TestEntrypointBuffer", "TestEntrypointGroup.Buffer"]) +def test_entrypoint_buffer(buffer_name: str) -> None: + config.set( + { + "buffer": "package_with_entrypoint." + buffer_name, + "ndbuffer": "package_with_entrypoint.TestEntrypointNDBuffer", + } + ) + assert zarr.registry.get_buffer_class().__qualname__ == buffer_name + assert zarr.registry.get_ndbuffer_class().__name__ == "TestEntrypointNDBuffer" diff --git a/tests/v3/test_codecs/__init__.py b/tests/v3/test_codecs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/v3/test_codecs/test_blosc.py b/tests/v3/test_codecs/test_blosc.py new file mode 100644 index 0000000000..416a2f784e --- /dev/null +++ b/tests/v3/test_codecs/test_blosc.py @@ -0,0 +1,55 @@ +import json + +import numpy as np +import pytest + +from zarr import AsyncArray +from zarr.abc.store import Store +from zarr.codecs import BloscCodec, BytesCodec, ShardingCodec +from zarr.core.buffer import default_buffer_prototype +from zarr.storage.common import StorePath + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +@pytest.mark.parametrize("dtype", ["uint8", "uint16"]) +async def test_blosc_evolve(store: Store, dtype: str) -> None: + typesize = np.dtype(dtype).itemsize + path = "blosc_evolve" + spath = StorePath(store, path) + await AsyncArray.create( + spath, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=dtype, + fill_value=0, + codecs=[BytesCodec(), BloscCodec()], + ) + buf = await store.get(f"{path}/zarr.json", prototype=default_buffer_prototype()) + assert buf is not None + zarr_json = json.loads(buf.to_bytes()) + blosc_configuration_json = zarr_json["codecs"][1]["configuration"] + assert blosc_configuration_json["typesize"] == typesize + if typesize == 1: + assert blosc_configuration_json["shuffle"] == "bitshuffle" + else: + assert blosc_configuration_json["shuffle"] == "shuffle" + + path2 = "blosc_evolve_sharding" + spath2 = StorePath(store, path2) + await AsyncArray.create( + spath2, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=dtype, + fill_value=0, + codecs=[ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()])], + ) + buf = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype()) + assert buf is not None + zarr_json = json.loads(buf.to_bytes()) + blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1]["configuration"] + assert blosc_configuration_json["typesize"] == typesize + if typesize == 1: + assert blosc_configuration_json["shuffle"] == "bitshuffle" + else: + assert blosc_configuration_json["shuffle"] == "shuffle" diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py new file mode 100644 index 0000000000..7a5fb979a1 --- /dev/null +++ b/tests/v3/test_codecs/test_codecs.py @@ -0,0 +1,379 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +from zarr import Array, AsyncArray, config +from zarr.codecs import ( + BytesCodec, + GzipCodec, + ShardingCodec, + TransposeCodec, +) +from zarr.core.buffer import default_buffer_prototype +from zarr.core.indexing import Selection, morton_order_iter +from zarr.storage import StorePath + +if TYPE_CHECKING: + from zarr.abc.codec import Codec + from zarr.abc.store import Store + from zarr.core.buffer.core import NDArrayLike + from zarr.core.common import MemoryOrder + + +@dataclass(frozen=True) +class _AsyncArrayProxy: + array: AsyncArray + + def __getitem__(self, selection: Selection) -> _AsyncArraySelectionProxy: + return _AsyncArraySelectionProxy(self.array, selection) + + +@dataclass(frozen=True) +class _AsyncArraySelectionProxy: + array: AsyncArray + selection: Selection + + async def get(self) -> NDArrayLike: + return await self.array.getitem(self.selection) + + async def set(self, value: np.ndarray) -> None: + return await self.array.setitem(self.selection, value) + + +def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: + if order == "F": + return tuple(ndim - x - 1 for x in range(ndim)) + else: + return tuple(range(ndim)) + + +def test_sharding_pickle() -> None: + """ + Test that sharding codecs can be pickled + """ + pass + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +@pytest.mark.parametrize("input_order", ["F", "C"]) +@pytest.mark.parametrize("store_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) +@pytest.mark.parametrize("with_sharding", [True, False]) +async def test_order( + store: Store, + input_order: MemoryOrder, + store_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, + with_sharding: bool, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) + path = "order" + spath = StorePath(store, path=path) + codecs_: list[Codec] = ( + [ + ShardingCodec( + chunk_shape=(16, 8), + codecs=[TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()], + ) + ] + if with_sharding + else [TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()] + ) + + with config.set({"array.order": runtime_write_order}): + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=codecs_, + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + with config.set({"array.order": runtime_read_order}): + a = await AsyncArray.open( + spath, + ) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + if runtime_read_order == "F": + assert read_data.flags["F_CONTIGUOUS"] + assert not read_data.flags["C_CONTIGUOUS"] + else: + assert not read_data.flags["F_CONTIGUOUS"] + assert read_data.flags["C_CONTIGUOUS"] + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +@pytest.mark.parametrize("input_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) +@pytest.mark.parametrize("with_sharding", [True, False]) +def test_order_implicit( + store: Store, + input_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, + with_sharding: bool, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order) + path = "order_implicit" + spath = StorePath(store, path) + codecs_: list[Codec] | None = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None + + with config.set({"array.order": runtime_write_order}): + a = Array.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=codecs_, + ) + + a[:, :] = data + + with config.set({"array.order": runtime_read_order}): + a = Array.open(spath) + read_data = a[:, :] + assert np.array_equal(data, read_data) + + if runtime_read_order == "F": + assert read_data.flags["F_CONTIGUOUS"] + assert not read_data.flags["C_CONTIGUOUS"] + else: + assert not read_data.flags["F_CONTIGUOUS"] + assert read_data.flags["C_CONTIGUOUS"] + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_open(store: Store) -> None: + spath = StorePath(store) + a = Array.create( + spath, + shape=(16, 16), + chunk_shape=(16, 16), + dtype="int32", + fill_value=0, + ) + b = Array.open(spath) + assert a.metadata == b.metadata + + +def test_morton() -> None: + assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)] + assert list(morton_order_iter((2, 2, 2))) == [ + (0, 0, 0), + (1, 0, 0), + (0, 1, 0), + (1, 1, 0), + (0, 0, 1), + (1, 0, 1), + (0, 1, 1), + (1, 1, 1), + ] + assert list(morton_order_iter((2, 2, 2, 2))) == [ + (0, 0, 0, 0), + (1, 0, 0, 0), + (0, 1, 0, 0), + (1, 1, 0, 0), + (0, 0, 1, 0), + (1, 0, 1, 0), + (0, 1, 1, 0), + (1, 1, 1, 0), + (0, 0, 0, 1), + (1, 0, 0, 1), + (0, 1, 0, 1), + (1, 1, 0, 1), + (0, 0, 1, 1), + (1, 0, 1, 1), + (0, 1, 1, 1), + (1, 1, 1, 1), + ] + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_write_partial_chunks(store: Store) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + spath = StorePath(store) + a = Array.create( + spath, + shape=data.shape, + chunk_shape=(20, 20), + dtype=data.dtype, + fill_value=1, + ) + a[0:16, 0:16] = data + assert np.array_equal(a[0:16, 0:16], data) + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +async def test_delete_empty_chunks(store: Store) -> None: + data = np.ones((16, 16)) + path = "delete_empty_chunks" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(32, 32), + dtype=data.dtype, + fill_value=1, + ) + await _AsyncArrayProxy(a)[:16, :16].set(np.zeros((16, 16))) + await _AsyncArrayProxy(a)[:16, :16].set(data) + assert np.array_equal(await _AsyncArrayProxy(a)[:16, :16].get(), data) + assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype()) is None + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +async def test_dimension_names(store: Store) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + path = "dimension_names" + spath = StorePath(store, path) + await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + dimension_names=("x", "y"), + ) + + assert (await AsyncArray.open(spath)).metadata.dimension_names == ( + "x", + "y", + ) + path2 = "dimension_names2" + spath2 = StorePath(store, path2) + await AsyncArray.create( + spath2, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + ) + + assert (await AsyncArray.open(spath2)).metadata.dimension_names is None + zarr_json_buffer = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype()) + assert zarr_json_buffer is not None + assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes()) + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_invalid_metadata(store: Store) -> None: + spath2 = StorePath(store, "invalid_endian") + with pytest.raises(TypeError): + Array.create( + spath2, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + BytesCodec(endian="big"), + TransposeCodec(order=order_from_dim("F", 2)), + ], + ) + spath3 = StorePath(store, "invalid_order") + with pytest.raises(TypeError): + Array.create( + spath3, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + BytesCodec(), + TransposeCodec(order="F"), # type: ignore[arg-type] + ], + ) + spath4 = StorePath(store, "invalid_missing_bytes_codec") + with pytest.raises(ValueError): + Array.create( + spath4, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + TransposeCodec(order=order_from_dim("F", 2)), + ], + ) + spath5 = StorePath(store, "invalid_inner_chunk_shape") + with pytest.raises(ValueError): + Array.create( + spath5, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + ShardingCodec(chunk_shape=(8,)), + ], + ) + spath6 = StorePath(store, "invalid_inner_chunk_shape") + with pytest.raises(ValueError): + Array.create( + spath6, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + ShardingCodec(chunk_shape=(8, 7)), + ], + ) + spath7 = StorePath(store, "warning_inefficient_codecs") + with pytest.warns(UserWarning): + Array.create( + spath7, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + ShardingCodec(chunk_shape=(8, 8)), + GzipCodec(), + ], + ) + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +async def test_resize(store: Store) -> None: + data = np.zeros((16, 18), dtype="uint16") + path = "resize" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(10, 10), + dtype=data.dtype, + chunk_key_encoding=("v2", "."), + fill_value=1, + ) + + await _AsyncArrayProxy(a)[:16, :18].set(data) + assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is not None + + a = await a.resize((10, 12)) + assert a.metadata.shape == (10, 12) + assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None + assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None diff --git a/tests/v3/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py new file mode 100644 index 0000000000..db4e77451c --- /dev/null +++ b/tests/v3/test_codecs/test_endian.py @@ -0,0 +1,58 @@ +from typing import Literal + +import numpy as np +import pytest + +from zarr import AsyncArray +from zarr.abc.store import Store +from zarr.codecs import BytesCodec +from zarr.storage.common import StorePath + +from .test_codecs import _AsyncArrayProxy + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +@pytest.mark.parametrize("endian", ["big", "little"]) +async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + path = "endian" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=[BytesCodec(endian=endian)], + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + readback_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, readback_data) + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +@pytest.mark.parametrize("dtype_input_endian", [">u2", "u2", " None: + data = np.arange(0, 256, dtype=dtype_input_endian).reshape((16, 16)) + path = "endian" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype="uint16", + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=[BytesCodec(endian=dtype_store_endian)], + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + readback_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, readback_data) diff --git a/tests/v3/test_codecs/test_gzip.py b/tests/v3/test_codecs/test_gzip.py new file mode 100644 index 0000000000..7b4d231813 --- /dev/null +++ b/tests/v3/test_codecs/test_gzip.py @@ -0,0 +1,24 @@ +import numpy as np +import pytest + +from zarr import Array +from zarr.abc.store import Store +from zarr.codecs import BytesCodec, GzipCodec +from zarr.storage.common import StorePath + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_gzip(store: Store) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + + a = Array.create( + StorePath(store), + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[BytesCodec(), GzipCodec()], + ) + + a[:, :] = data + assert np.array_equal(data, a[:, :]) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py new file mode 100644 index 0000000000..c0dcfbf350 --- /dev/null +++ b/tests/v3/test_codecs/test_sharding.py @@ -0,0 +1,332 @@ +import pickle +from typing import Any + +import numpy as np +import numpy.typing as npt +import pytest + +from zarr import Array, AsyncArray +from zarr.abc.store import Store +from zarr.codecs import ( + BloscCodec, + BytesCodec, + ShardingCodec, + ShardingCodecIndexLocation, + TransposeCodec, +) +from zarr.core.buffer import default_buffer_prototype +from zarr.storage.common import StorePath + +from ..conftest import ArrayRequest +from .test_codecs import _AsyncArrayProxy, order_from_dim + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 1, dtype="uint8", order="C"), + ArrayRequest(shape=(128,) * 2, dtype="uint8", order="C"), + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize("offset", [0, 10]) +def test_sharding( + store: Store, + array_fixture: npt.NDArray[Any], + index_location: ShardingCodecIndexLocation, + offset: int, +) -> None: + """ + Test that we can create an array with a sharding codec, write data to that array, and get + the same data out via indexing. + """ + data = array_fixture + spath = StorePath(store) + arr = Array.create( + spath, + shape=tuple(s + offset for s in data.shape), + chunk_shape=(64,) * data.ndim, + dtype=data.dtype, + fill_value=6, + codecs=[ + ShardingCodec( + chunk_shape=(32,) * data.ndim, + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + write_region = tuple(slice(offset, None) for dim in range(data.ndim)) + arr[write_region] = data + + if offset > 0: + empty_region = tuple(slice(0, offset) for dim in range(data.ndim)) + assert np.all(arr[empty_region] == arr.metadata.fill_value) + + read_data = arr[write_region] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +def test_sharding_partial( + store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation +) -> None: + data = array_fixture + spath = StorePath(store) + a = Array.create( + spath, + shape=tuple(a + 10 for a in data.shape), + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=0, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + + a[10:, 10:, 10:] = data + + read_data = a[0:10, 0:10, 0:10] + assert np.all(read_data == 0) + + read_data = a[10:, 10:, 10:] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +def test_sharding_partial_read( + store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation +) -> None: + data = array_fixture + spath = StorePath(store) + a = Array.create( + spath, + shape=tuple(a + 10 for a in data.shape), + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=1, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + + read_data = a[0:10, 0:10, 0:10] + assert np.all(read_data == 1) + + +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +def test_sharding_partial_overwrite( + store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation +) -> None: + data = array_fixture[:10, :10, :10] + spath = StorePath(store) + a = Array.create( + spath, + shape=tuple(a + 10 for a in data.shape), + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=1, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + + a[:10, :10, :10] = data + + read_data = a[0:10, 0:10, 0:10] + assert np.array_equal(data, read_data) + + data = data + 10 + a[:10, :10, :10] = data + read_data = a[0:10, 0:10, 0:10] + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize( + "outer_index_location", + ["start", "end"], +) +@pytest.mark.parametrize( + "inner_index_location", + ["start", "end"], +) +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +def test_nested_sharding( + store: Store, + array_fixture: npt.NDArray[Any], + outer_index_location: ShardingCodecIndexLocation, + inner_index_location: ShardingCodecIndexLocation, +) -> None: + data = array_fixture + spath = StorePath(store) + a = Array.create( + spath, + shape=data.shape, + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=0, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + ShardingCodec(chunk_shape=(16, 16, 16), index_location=inner_index_location) + ], + index_location=outer_index_location, + ) + ], + ) + + a[:, :, :] = data + + read_data = a[0 : data.shape[0], 0 : data.shape[1], 0 : data.shape[2]] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +def test_open_sharding(store: Store) -> None: + path = "open_sharding" + spath = StorePath(store, path) + a = Array.create( + spath, + shape=(16, 16), + chunk_shape=(16, 16), + dtype="int32", + fill_value=0, + codecs=[ + ShardingCodec( + chunk_shape=(8, 8), + codecs=[ + TransposeCodec(order=order_from_dim("F", 2)), + BytesCodec(), + BloscCodec(), + ], + ) + ], + ) + b = Array.open(spath) + assert a.metadata == b.metadata + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +def test_write_partial_sharded_chunks(store: Store) -> None: + data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) + spath = StorePath(store) + a = Array.create( + spath, + shape=(40, 40), + chunk_shape=(20, 20), + dtype=data.dtype, + fill_value=1, + codecs=[ + ShardingCodec( + chunk_shape=(10, 10), + codecs=[ + BytesCodec(), + BloscCodec(), + ], + ) + ], + ) + a[0:16, 0:16] = data + assert np.array_equal(a[0:16, 0:16], data) + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +async def test_delete_empty_shards(store: Store) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") + path = "delete_empty_shards" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=(16, 16), + chunk_shape=(8, 16), + dtype="uint16", + fill_value=1, + codecs=[ShardingCodec(chunk_shape=(8, 8))], + ) + await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16))) + await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16))) + await _AsyncArrayProxy(a)[:, 8:].set(np.ones((16, 8))) + # chunk (0, 0) is full + # chunks (0, 1), (1, 0), (1, 1) are empty + # shard (0, 0) is half-full + # shard (1, 0) is empty + + data = np.ones((16, 16), dtype="uint16") + data[:8, :8] = 0 + assert np.array_equal(data, await _AsyncArrayProxy(a)[:, :].get()) + assert await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype()) is None + chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype()) + assert chunk_bytes is not None + assert len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 + + +def test_pickle() -> None: + codec = ShardingCodec(chunk_shape=(8, 8)) + assert pickle.loads(pickle.dumps(codec)) == codec diff --git a/tests/v3/test_codecs/test_transpose.py b/tests/v3/test_codecs/test_transpose.py new file mode 100644 index 0000000000..2b3914150e --- /dev/null +++ b/tests/v3/test_codecs/test_transpose.py @@ -0,0 +1,107 @@ +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +from zarr import Array, AsyncArray, config +from zarr.abc.store import Store +from zarr.codecs import BytesCodec, ShardingCodec, TransposeCodec +from zarr.core.common import MemoryOrder +from zarr.storage.common import StorePath + +from .test_codecs import _AsyncArrayProxy + +if TYPE_CHECKING: + from zarr.abc.codec import Codec + + +@pytest.mark.parametrize("input_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) +@pytest.mark.parametrize("with_sharding", [True, False]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +async def test_transpose( + store: Store, + input_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, + with_sharding: bool, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order) + spath = StorePath(store, path="transpose") + codecs_: list[Codec] = ( + [ + ShardingCodec( + chunk_shape=(1, 16, 8), + codecs=[TransposeCodec(order=(2, 1, 0)), BytesCodec()], + ) + ] + if with_sharding + else [TransposeCodec(order=(2, 1, 0)), BytesCodec()] + ) + with config.set({"array.order": runtime_write_order}): + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(1, 32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=codecs_, + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + with config.set({"array.order": runtime_read_order}): + a = await AsyncArray.open( + spath, + ) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + if runtime_read_order == "F": + assert read_data.flags["F_CONTIGUOUS"] + assert not read_data.flags["C_CONTIGUOUS"] + else: + assert not read_data.flags["F_CONTIGUOUS"] + assert read_data.flags["C_CONTIGUOUS"] + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +@pytest.mark.parametrize("order", [[1, 2, 0], [1, 2, 3, 0], [3, 2, 4, 0, 1]]) +def test_transpose_non_self_inverse(store: Store, order: list[int]) -> None: + shape = [i + 3 for i in range(len(order))] + data = np.arange(0, np.prod(shape), dtype="uint16").reshape(shape) + spath = StorePath(store, "transpose_non_self_inverse") + a = Array.create( + spath, + shape=data.shape, + chunk_shape=data.shape, + dtype=data.dtype, + fill_value=0, + codecs=[TransposeCodec(order=order), BytesCodec()], + ) + a[:, :] = data + read_data = a[:, :] + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_transpose_invalid( + store: Store, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8)) + spath = StorePath(store, "transpose_invalid") + for order in [(1, 0), (3, 2, 1), (3, 3, 1)]: + with pytest.raises(ValueError): + Array.create( + spath, + shape=data.shape, + chunk_shape=(1, 32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=[TransposeCodec(order=order), BytesCodec()], + ) diff --git a/tests/v3/test_codecs/test_vlen.py b/tests/v3/test_codecs/test_vlen.py new file mode 100644 index 0000000000..aaea5dab83 --- /dev/null +++ b/tests/v3/test_codecs/test_vlen.py @@ -0,0 +1,95 @@ +from typing import Any + +import numpy as np +import pytest + +from zarr import Array +from zarr.abc.codec import Codec +from zarr.abc.store import Store +from zarr.codecs import VLenBytesCodec, VLenUTF8Codec, ZstdCodec +from zarr.core.metadata.v3 import ArrayV3Metadata, DataType +from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING +from zarr.storage.common import StorePath + +numpy_str_dtypes: list[type | str | None] = [None, str, "str", np.dtypes.StrDType] +expected_zarr_string_dtype: np.dtype[Any] +if _NUMPY_SUPPORTS_VLEN_STRING: + numpy_str_dtypes.append(np.dtypes.StringDType) + expected_zarr_string_dtype = np.dtypes.StringDType() +else: + expected_zarr_string_dtype = np.dtype("O") + + +@pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"]) +@pytest.mark.parametrize("dtype", numpy_str_dtypes) +@pytest.mark.parametrize("as_object_array", [False, True]) +@pytest.mark.parametrize("codecs", [None, [VLenUTF8Codec()], [VLenUTF8Codec(), ZstdCodec()]]) +def test_vlen_string( + store: Store, dtype: None | np.dtype[Any], as_object_array: bool, codecs: None | list[Codec] +) -> None: + strings = ["hello", "world", "this", "is", "a", "test"] + data = np.array(strings, dtype=dtype).reshape((2, 3)) + + sp = StorePath(store, path="string") + a = Array.create( + sp, + shape=data.shape, + chunk_shape=data.shape, + dtype=data.dtype, + fill_value="", + codecs=codecs, + ) + assert isinstance(a.metadata, ArrayV3Metadata) # needed for mypy + + # should also work if input array is an object array, provided we explicitly specified + # a stringlike dtype when creating the Array + if as_object_array: + data = data.astype("O") + + a[:, :] = data + assert np.array_equal(data, a[:, :]) + assert a.metadata.data_type == DataType.string + assert a.dtype == expected_zarr_string_dtype + + # test round trip + b = Array.open(sp) + assert isinstance(b.metadata, ArrayV3Metadata) # needed for mypy + assert np.array_equal(data, b[:, :]) + assert b.metadata.data_type == DataType.string + assert a.dtype == expected_zarr_string_dtype + + +@pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"]) +@pytest.mark.parametrize("as_object_array", [False, True]) +@pytest.mark.parametrize("codecs", [None, [VLenBytesCodec()], [VLenBytesCodec(), ZstdCodec()]]) +def test_vlen_bytes(store: Store, as_object_array: bool, codecs: None | list[Codec]) -> None: + bstrings = [b"hello", b"world", b"this", b"is", b"a", b"test"] + data = np.array(bstrings).reshape((2, 3)) + assert data.dtype == "|S5" + + sp = StorePath(store, path="string") + a = Array.create( + sp, + shape=data.shape, + chunk_shape=data.shape, + dtype=data.dtype, + fill_value=b"", + codecs=codecs, + ) + assert isinstance(a.metadata, ArrayV3Metadata) # needed for mypy + + # should also work if input array is an object array, provided we explicitly specified + # a bytesting-like dtype when creating the Array + if as_object_array: + data = data.astype("O") + a[:, :] = data + assert np.array_equal(data, a[:, :]) + assert a.metadata.data_type == DataType.bytes + assert a.dtype == "O" + + # test round trip + b = Array.open(sp) + assert isinstance(b.metadata, ArrayV3Metadata) # needed for mypy + assert np.array_equal(data, b[:, :]) + assert b.metadata.data_type == DataType.bytes + assert a.dtype == "O" diff --git a/tests/v3/test_codecs/test_zstd.py b/tests/v3/test_codecs/test_zstd.py new file mode 100644 index 0000000000..29efc29466 --- /dev/null +++ b/tests/v3/test_codecs/test_zstd.py @@ -0,0 +1,25 @@ +import numpy as np +import pytest + +from zarr import Array +from zarr.abc.store import Store +from zarr.codecs import BytesCodec, ZstdCodec +from zarr.storage.common import StorePath + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +@pytest.mark.parametrize("checksum", [True, False]) +def test_zstd(store: Store, checksum: bool) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + + a = Array.create( + StorePath(store, path="zstd"), + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[BytesCodec(), ZstdCodec(level=0, checksum=checksum)], + ) + + a[:, :] = data + assert np.array_equal(data, a[:, :]) diff --git a/tests/v3/test_common.py b/tests/v3/test_common.py new file mode 100644 index 0000000000..c28723d1a8 --- /dev/null +++ b/tests/v3/test_common.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterable + from typing import Any, Literal + +import numpy as np +import pytest + +from zarr.core.common import parse_name, parse_shapelike, product +from zarr.core.config import parse_indexing_order + + +@pytest.mark.parametrize("data", [(0, 0, 0, 0), (1, 3, 4, 5, 6), (2, 4)]) +def test_product(data: tuple[int, ...]) -> None: + assert product(data) == np.prod(data) + + +# todo: test +def test_concurrent_map() -> None: ... + + +# todo: test +def test_to_thread() -> None: ... + + +# todo: test +def test_enum_names() -> None: ... + + +# todo: test +def test_parse_enum() -> None: ... + + +@pytest.mark.parametrize("data", [("foo", "bar"), (10, 11)]) +def test_parse_name_invalid(data: tuple[Any, Any]) -> None: + observed, expected = data + if isinstance(observed, str): + with pytest.raises(ValueError, match=f"Expected '{expected}'. Got {observed} instead."): + parse_name(observed, expected) + else: + with pytest.raises( + TypeError, match=f"Expected a string, got an instance of {type(observed)}." + ): + parse_name(observed, expected) + + +@pytest.mark.parametrize("data", [("foo", "foo"), ("10", "10")]) +def test_parse_name_valid(data: tuple[Any, Any]) -> None: + observed, expected = data + assert parse_name(observed, expected) == observed + + +@pytest.mark.parametrize("data", [0, 1, "hello", "f"]) +def test_parse_indexing_order_invalid(data: Any) -> None: + with pytest.raises(ValueError, match="Expected one of"): + parse_indexing_order(data) + + +@pytest.mark.parametrize("data", ["C", "F"]) +def parse_indexing_order_valid(data: Literal["C", "F"]) -> None: + assert parse_indexing_order(data) == data + + +@pytest.mark.parametrize("data", [lambda v: v, slice(None)]) +def test_parse_shapelike_invalid_single_type(data: Any) -> None: + """ + Test that we get the expected error message when passing in a value that is not an integer + or an iterable of integers. + """ + with pytest.raises(TypeError, match="Expected an integer or an iterable of integers."): + parse_shapelike(data) + + +def test_parse_shapelike_invalid_single_value() -> None: + """ + Test that we get the expected error message when passing in a negative integer. + """ + with pytest.raises(ValueError, match="Expected a non-negative integer."): + parse_shapelike(-1) + + +@pytest.mark.parametrize("data", ["shape", ("0", 1, 2, 3), {"0": "0"}, ((1, 2), (2, 2)), (4.0, 2)]) +def test_parse_shapelike_invalid_iterable_types(data: Any) -> None: + """ + Test that we get the expected error message when passing in an iterable containing + non-integer elements + """ + with pytest.raises(TypeError, match="Expected an iterable of integers"): + parse_shapelike(data) + + +@pytest.mark.parametrize("data", [(1, 2, 3, -1), (-10,)]) +def test_parse_shapelike_invalid_iterable_values(data: Any) -> None: + """ + Test that we get the expected error message when passing in an iterable containing negative + integers + """ + with pytest.raises(ValueError, match="Expected all values to be non-negative."): + parse_shapelike(data) + + +@pytest.mark.parametrize("data", [range(10), [0, 1, 2, 3], (3, 4, 5), ()]) +def test_parse_shapelike_valid(data: Iterable[int]) -> None: + assert parse_shapelike(data) == tuple(data) + + +# todo: more dtypes +@pytest.mark.parametrize("data", [("uint8", np.uint8), ("float64", np.float64)]) +def parse_dtype(data: tuple[str, np.dtype[Any]]) -> None: + unparsed, parsed = data + assert parse_dtype(unparsed) == parsed + + +# todo: figure out what it means to test this +def test_parse_fill_value() -> None: ... diff --git a/tests/v3/test_config.py b/tests/v3/test_config.py new file mode 100644 index 0000000000..62907588c7 --- /dev/null +++ b/tests/v3/test_config.py @@ -0,0 +1,233 @@ +import os +from collections.abc import Iterable +from typing import Any +from unittest import mock +from unittest.mock import Mock + +import numpy as np +import pytest + +import zarr +from zarr import Array, zeros +from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline +from zarr.abc.store import ByteSetter, Store +from zarr.codecs import BatchedCodecPipeline, BloscCodec, BytesCodec, Crc32cCodec, ShardingCodec +from zarr.core.array_spec import ArraySpec +from zarr.core.buffer import NDBuffer +from zarr.core.config import BadConfigError, config +from zarr.core.indexing import SelectorTuple +from zarr.registry import ( + fully_qualified_name, + get_buffer_class, + get_codec_class, + get_ndbuffer_class, + get_pipeline_class, + register_buffer, + register_codec, + register_ndbuffer, + register_pipeline, +) +from zarr.testing.buffer import ( + NDBufferUsingTestNDArrayLike, + StoreExpectingTestBuffer, + TestBuffer, + TestNDArrayLike, +) + + +def test_config_defaults_set() -> None: + # regression test for available defaults + assert config.defaults == [ + { + "default_zarr_version": 3, + "array": {"order": "C"}, + "async": {"concurrency": 10, "timeout": None}, + "threading": {"max_workers": None}, + "json_indent": 2, + "codec_pipeline": { + "path": "zarr.codecs.pipeline.BatchedCodecPipeline", + "batch_size": 1, + }, + "buffer": "zarr.core.buffer.cpu.Buffer", + "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", + "codecs": { + "blosc": "zarr.codecs.blosc.BloscCodec", + "gzip": "zarr.codecs.gzip.GzipCodec", + "zstd": "zarr.codecs.zstd.ZstdCodec", + "bytes": "zarr.codecs.bytes.BytesCodec", + "endian": "zarr.codecs.bytes.BytesCodec", + "crc32c": "zarr.codecs.crc32c_.Crc32cCodec", + "sharding_indexed": "zarr.codecs.sharding.ShardingCodec", + "transpose": "zarr.codecs.transpose.TransposeCodec", + "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec", + "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec", + }, + } + ] + assert config.get("array.order") == "C" + assert config.get("async.concurrency") == 10 + assert config.get("async.timeout") is None + assert config.get("codec_pipeline.batch_size") == 1 + assert config.get("json_indent") == 2 + + +@pytest.mark.parametrize( + ("key", "old_val", "new_val"), + [("array.order", "C", "F"), ("async.concurrency", 10, 20), ("json_indent", 2, 0)], +) +def test_config_defaults_can_be_overridden(key: str, old_val: Any, new_val: Any) -> None: + assert config.get(key) == old_val + with config.set({key: new_val}): + assert config.get(key) == new_val + + +def test_fully_qualified_name() -> None: + class MockClass: + pass + + assert ( + fully_qualified_name(MockClass) + == "tests.v3.test_config.test_fully_qualified_name..MockClass" + ) + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_config_codec_pipeline_class(store: Store) -> None: + # has default value + assert get_pipeline_class().__name__ != "" + + config.set({"codec_pipeline.name": "zarr.codecs.pipeline.BatchedCodecPipeline"}) + assert get_pipeline_class() == zarr.codecs.pipeline.BatchedCodecPipeline + + _mock = Mock() + + class MockCodecPipeline(BatchedCodecPipeline): + async def write( + self, + batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]], + value: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> None: + _mock.call() + + register_pipeline(MockCodecPipeline) + config.set({"codec_pipeline.path": fully_qualified_name(MockCodecPipeline)}) + + assert get_pipeline_class() == MockCodecPipeline + + # test if codec is used + arr = Array.create( + store=store, + shape=(100,), + chunks=(10,), + zarr_format=3, + dtype="i4", + ) + arr[:] = range(100) + + _mock.call.assert_called() + + with pytest.raises(BadConfigError): + config.set({"codec_pipeline.path": "wrong_name"}) + get_pipeline_class() + + class MockEnvCodecPipeline(CodecPipeline): + pass + + register_pipeline(MockEnvCodecPipeline) + + with mock.patch.dict( + os.environ, {"ZARR_CODEC_PIPELINE__PATH": fully_qualified_name(MockEnvCodecPipeline)} + ): + assert get_pipeline_class(reload_config=True) == MockEnvCodecPipeline + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_config_codec_implementation(store: Store) -> None: + # has default value + assert fully_qualified_name(get_codec_class("blosc")) == config.defaults[0]["codecs"]["blosc"] + + _mock = Mock() + + class MockBloscCodec(BloscCodec): + async def _encode_single( + self, chunk_data: CodecInput, chunk_spec: ArraySpec + ) -> CodecOutput | None: + _mock.call() + + config.set({"codecs.blosc": fully_qualified_name(MockBloscCodec)}) + register_codec("blosc", MockBloscCodec) + assert get_codec_class("blosc") == MockBloscCodec + + # test if codec is used + arr = Array.create( + store=store, + shape=(100,), + chunks=(10,), + zarr_format=3, + dtype="i4", + codecs=[BytesCodec(), {"name": "blosc", "configuration": {}}], + ) + arr[:] = range(100) + _mock.call.assert_called() + + with mock.patch.dict(os.environ, {"ZARR_CODECS__BLOSC": fully_qualified_name(BloscCodec)}): + assert get_codec_class("blosc", reload_config=True) == BloscCodec + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_config_ndbuffer_implementation(store: Store) -> None: + # has default value + assert fully_qualified_name(get_ndbuffer_class()) == config.defaults[0]["ndbuffer"] + + # set custom ndbuffer with TestNDArrayLike implementation + register_ndbuffer(NDBufferUsingTestNDArrayLike) + config.set({"ndbuffer": fully_qualified_name(NDBufferUsingTestNDArrayLike)}) + assert get_ndbuffer_class() == NDBufferUsingTestNDArrayLike + arr = Array.create( + store=store, + shape=(100,), + chunks=(10,), + zarr_format=3, + dtype="i4", + ) + got = arr[:] + print(type(got)) + assert isinstance(got, TestNDArrayLike) + + +def test_config_buffer_implementation() -> None: + # has default value + assert fully_qualified_name(get_buffer_class()) == config.defaults[0]["buffer"] + + arr = zeros(shape=(100), store=StoreExpectingTestBuffer(mode="w")) + + # AssertionError of StoreExpectingTestBuffer when not using my buffer + with pytest.raises(AssertionError): + arr[:] = np.arange(100) + + register_buffer(TestBuffer) + config.set({"buffer": fully_qualified_name(TestBuffer)}) + assert get_buffer_class() == TestBuffer + + # no error using TestBuffer + data = np.arange(100) + arr[:] = np.arange(100) + assert np.array_equal(arr[:], data) + + data2d = np.arange(1000).reshape(100, 10) + arr_sharding = zeros( + shape=(100, 10), + store=StoreExpectingTestBuffer(mode="w"), + codecs=[ShardingCodec(chunk_shape=(10, 10))], + ) + arr_sharding[:] = data2d + assert np.array_equal(arr_sharding[:], data2d) + + arr_Crc32c = zeros( + shape=(100, 10), + store=StoreExpectingTestBuffer(mode="w"), + codecs=[BytesCodec(), Crc32cCodec()], + ) + arr_Crc32c[:] = data2d + assert np.array_equal(arr_Crc32c[:], data2d) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py new file mode 100644 index 0000000000..20960f0346 --- /dev/null +++ b/tests/v3/test_group.py @@ -0,0 +1,1287 @@ +from __future__ import annotations + +import contextlib +import pickle +import warnings +from typing import TYPE_CHECKING, Any, Literal, cast + +import numpy as np +import pytest + +import zarr +import zarr.api.asynchronous +import zarr.api.synchronous +from zarr import Array, AsyncArray, AsyncGroup, Group +from zarr.abc.store import Store +from zarr.core.buffer import default_buffer_prototype +from zarr.core.common import JSON, ZarrFormat +from zarr.core.group import ConsolidatedMetadata, GroupMetadata +from zarr.core.sync import sync +from zarr.errors import ContainsArrayError, ContainsGroupError +from zarr.storage import LocalStore, MemoryStore, StorePath, ZipStore +from zarr.storage.common import make_store_path + +from .conftest import parse_store + +if TYPE_CHECKING: + from _pytest.compat import LEGACY_PATH + + +@pytest.fixture(params=["local", "memory", "zip"]) +async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: + result = await parse_store(request.param, str(tmpdir)) + if not isinstance(result, Store): + raise TypeError("Wrong store class returned by test fixture! got " + result + " instead") + return result + + +@pytest.fixture(params=[True, False]) +def exists_ok(request: pytest.FixtureRequest) -> bool: + result = request.param + if not isinstance(result, bool): + raise TypeError("Wrong type returned by test fixture.") + return result + + +@pytest.fixture(params=[2, 3], ids=["zarr2", "zarr3"]) +def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: + result = request.param + if result not in (2, 3): + raise ValueError("Wrong value returned from test fixture.") + return cast(ZarrFormat, result) + + +def test_group_init(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test that initializing a group from an asyncgroup works. + """ + agroup = sync(AsyncGroup.from_store(store=store, zarr_format=zarr_format)) + group = Group(agroup) + assert group._async_group == agroup + + +async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> None: + # prepare a root node, with some data set + await zarr.api.asynchronous.open_group( + store=store, path="a", zarr_format=zarr_format, attributes={"key": "value"} + ) + objs = {x async for x in store.list()} + if zarr_format == 2: + assert objs == {".zgroup", ".zattrs", "a/.zgroup", "a/.zattrs"} + else: + assert objs == {"zarr.json", "a/zarr.json"} + + # test that root group node was created + root = await zarr.api.asynchronous.open_group( + store=store, + ) + agroup = await root.getitem("a") + assert agroup.attrs == {"key": "value"} + + # create a child node with a couple intermediates + await zarr.api.asynchronous.open_group(store=store, path="a/b/c/d", zarr_format=zarr_format) + parts = ["a", "a/b", "a/b/c"] + + if zarr_format == 2: + files = [".zattrs", ".zgroup"] + else: + files = ["zarr.json"] + + expected = [f"{part}/{file}" for file in files for part in parts] + + if zarr_format == 2: + expected.extend([".zgroup", ".zattrs", "a/b/c/d/.zgroup", "a/b/c/d/.zattrs"]) + else: + expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) + + expected = sorted(expected) + + result = sorted([x async for x in store.list_prefix("")]) + + assert result == expected + + paths = ["a", "a/b", "a/b/c"] + for path in paths: + g = await zarr.api.asynchronous.open_group(store=store, path=path) + assert isinstance(g, AsyncGroup) + + if path == "a": + # ensure we didn't overwrite the root attributes + assert g.attrs == {"key": "value"} + else: + assert g.attrs == {} + + +def test_group_name_properties(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test basic properties of groups + """ + root = Group.from_store(store=store, zarr_format=zarr_format) + assert root.path == "" + assert root.name == "/" + assert root.basename == "" + + foo = root.create_group("foo") + assert foo.path == "foo" + assert foo.name == "/foo" + assert foo.basename == "foo" + + bar = root.create_group("foo/bar") + assert bar.path == "foo/bar" + assert bar.name == "/foo/bar" + assert bar.basename == "bar" + + +@pytest.mark.parametrize("consolidated_metadata", [True, False]) +def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metadata: bool) -> None: + """ + Test that `Group.members` returns correct values, i.e. the arrays and groups + (explicit and implicit) contained in that group. + """ + # group/ + # subgroup/ + # subsubgroup/ + # subsubsubgroup + # subarray + + path = "group" + group = Group.from_store( + store=store, + zarr_format=zarr_format, + ) + members_expected: dict[str, Array | Group] = {} + + members_expected["subgroup"] = group.create_group("subgroup") + # make a sub-sub-subgroup, to ensure that the children calculation doesn't go + # too deep in the hierarchy + subsubgroup = members_expected["subgroup"].create_group("subsubgroup") + subsubsubgroup = subsubgroup.create_group("subsubsubgroup") + + members_expected["subarray"] = group.create_array( + "subarray", shape=(100,), dtype="uint8", chunk_shape=(10,), exists_ok=True + ) + + # add an extra object to the domain of the group. + # the list of children should ignore this object. + sync( + store.set( + f"{path}/extra_object-1", + default_buffer_prototype().buffer.from_bytes(b"000000"), + ) + ) + # add an extra object under a directory-like prefix in the domain of the group. + # this creates a directory with a random key in it + # this should not show up as a member + sync( + store.set( + f"{path}/extra_directory/extra_object-2", + default_buffer_prototype().buffer.from_bytes(b"000000"), + ) + ) + + # this warning shows up when extra objects show up in the hierarchy + warn_context = pytest.warns( + UserWarning, match=r"Object at .* is not recognized as a component of a Zarr hierarchy." + ) + if consolidated_metadata: + with warn_context: + zarr.consolidate_metadata(store=store, zarr_format=zarr_format) + # now that we've consolidated the store, we shouldn't get the warnings from the unrecognized objects anymore + # we use a nullcontext to handle these cases + warn_context = contextlib.nullcontext() + group = zarr.open_consolidated(store=store, zarr_format=zarr_format) + + with warn_context: + members_observed = group.members() + # members are not guaranteed to be ordered, so sort before comparing + assert sorted(dict(members_observed)) == sorted(members_expected) + + # partial + with warn_context: + members_observed = group.members(max_depth=1) + members_expected["subgroup/subsubgroup"] = subsubgroup + # members are not guaranteed to be ordered, so sort before comparing + assert sorted(dict(members_observed)) == sorted(members_expected) + + # total + with warn_context: + members_observed = group.members(max_depth=None) + members_expected["subgroup/subsubgroup/subsubsubgroup"] = subsubsubgroup + # members are not guaranteed to be ordered, so sort before comparing + assert sorted(dict(members_observed)) == sorted(members_expected) + + with pytest.raises(ValueError, match="max_depth"): + members_observed = group.members(max_depth=-1) + + +def test_group(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test basic Group routines. + """ + store_path = StorePath(store) + agroup = AsyncGroup(metadata=GroupMetadata(zarr_format=zarr_format), store_path=store_path) + group = Group(agroup) + assert agroup.metadata is group.metadata + assert agroup.store_path == group.store_path == store_path + + # create two groups + foo = group.create_group("foo") + bar = foo.create_group("bar", attributes={"baz": "qux"}) + + # create an array from the "bar" group + data = np.arange(0, 4 * 4, dtype="uint16").reshape((4, 4)) + arr = bar.create_array( + "baz", shape=data.shape, dtype=data.dtype, chunk_shape=(2, 2), exists_ok=True + ) + arr[:] = data + + # check the array + assert arr == bar["baz"] + assert arr.shape == data.shape + assert arr.dtype == data.dtype + + # TODO: update this once the array api settles down + assert arr.chunks == (2, 2) + + bar2 = foo["bar"] + assert dict(bar2.attrs) == {"baz": "qux"} + + # update a group's attributes + bar2.attrs.update({"name": "bar"}) + # bar.attrs was modified in-place + assert dict(bar2.attrs) == {"baz": "qux", "name": "bar"} + + # and the attrs were modified in the store + bar3 = foo["bar"] + assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} + + +def test_group_create(store: Store, exists_ok: bool, zarr_format: ZarrFormat) -> None: + """ + Test that `Group.from_store` works as expected. + """ + attributes = {"foo": 100} + group = Group.from_store( + store, attributes=attributes, zarr_format=zarr_format, exists_ok=exists_ok + ) + + assert group.attrs == attributes + + if not exists_ok: + with pytest.raises(ContainsGroupError): + _ = Group.from_store(store, exists_ok=exists_ok, zarr_format=zarr_format) + + +def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> None: + """ + Test the `Group.open` method. + """ + spath = StorePath(store) + # attempt to open a group that does not exist + with pytest.raises(FileNotFoundError): + Group.open(store) + + # create the group + attrs = {"path": "foo"} + group_created = Group.from_store( + store, attributes=attrs, zarr_format=zarr_format, exists_ok=exists_ok + ) + assert group_created.attrs == attrs + assert group_created.metadata.zarr_format == zarr_format + assert group_created.store_path == spath + + # attempt to create a new group in place, to test exists_ok + new_attrs = {"path": "bar"} + if not exists_ok: + with pytest.raises(ContainsGroupError): + Group.from_store(store, attributes=attrs, zarr_format=zarr_format, exists_ok=exists_ok) + else: + group_created_again = Group.from_store( + store, attributes=new_attrs, zarr_format=zarr_format, exists_ok=exists_ok + ) + assert group_created_again.attrs == new_attrs + assert group_created_again.metadata.zarr_format == zarr_format + assert group_created_again.store_path == spath + + +@pytest.mark.parametrize("consolidated", [True, False]) +def test_group_getitem(store: Store, zarr_format: ZarrFormat, consolidated: bool) -> None: + """ + Test the `Group.__getitem__` method. + """ + + group = Group.from_store(store, zarr_format=zarr_format) + subgroup = group.create_group(name="subgroup") + subarray = group.create_array(name="subarray", shape=(10,), chunk_shape=(10,)) + + if consolidated: + group = zarr.api.synchronous.consolidate_metadata(store=store, zarr_format=zarr_format) + object.__setattr__( + subgroup.metadata, "consolidated_metadata", ConsolidatedMetadata(metadata={}) + ) + + assert group["subgroup"] == subgroup + assert group["subarray"] == subarray + with pytest.raises(KeyError): + group["nope"] + + +def test_group_get_with_default(store: Store, zarr_format: ZarrFormat) -> None: + group = Group.from_store(store, zarr_format=zarr_format) + + # default behavior + result = group.get("subgroup") + assert result is None + + # custom default + result = group.get("subgroup", 8) + assert result == 8 + + # now with a group + subgroup = group.require_group("subgroup") + subgroup.attrs["foo"] = "bar" + + result = group.get("subgroup", 8) + assert result.attrs["foo"] == "bar" + + +@pytest.mark.parametrize("consolidated", [True, False]) +def test_group_delitem(store: Store, zarr_format: ZarrFormat, consolidated: bool) -> None: + """ + Test the `Group.__delitem__` method. + """ + if not store.supports_deletes: + pytest.skip("store does not support deletes") + + group = Group.from_store(store, zarr_format=zarr_format) + subgroup = group.create_group(name="subgroup") + subarray = group.create_array(name="subarray", shape=(10,), chunk_shape=(10,)) + + if consolidated: + group = zarr.api.synchronous.consolidate_metadata(store=store, zarr_format=zarr_format) + object.__setattr__( + subgroup.metadata, "consolidated_metadata", ConsolidatedMetadata(metadata={}) + ) + + assert group["subgroup"] == subgroup + assert group["subarray"] == subarray + + del group["subgroup"] + with pytest.raises(KeyError): + group["subgroup"] + + del group["subarray"] + with pytest.raises(KeyError): + group["subarray"] + + +def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test the `Group.__iter__` method. + """ + + group = Group.from_store(store, zarr_format=zarr_format) + assert list(group) == [] + + +def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test the `Group.__len__` method. + """ + + group = Group.from_store(store, zarr_format=zarr_format) + assert len(group) == 0 + + +def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test the `Group.__setitem__` method. + """ + group = Group.from_store(store, zarr_format=zarr_format) + with pytest.raises(NotImplementedError): + group["key"] = 10 + + +def test_group_contains(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test the `Group.__contains__` method + """ + group = Group.from_store(store, zarr_format=zarr_format) + assert "foo" not in group + _ = group.create_group(name="foo") + assert "foo" in group + + +@pytest.mark.parametrize("consolidate", [True, False]) +def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidate: bool): + group = Group.from_store(store, zarr_format=zarr_format) + expected_group_keys = ["g0", "g1"] + expected_group_values = [group.create_group(name=name) for name in expected_group_keys] + expected_groups = list(zip(expected_group_keys, expected_group_values, strict=False)) + + expected_group_values[0].create_group("subgroup") + expected_group_values[0].create_array("subarray", shape=(1,)) + + expected_array_keys = ["a0", "a1"] + expected_array_values = [ + group.create_array(name=name, shape=(1,)) for name in expected_array_keys + ] + expected_arrays = list(zip(expected_array_keys, expected_array_values, strict=False)) + fill_value: float | None + if zarr_format == 2: + fill_value = None + else: + fill_value = np.float64(0.0) + + if consolidate: + group = zarr.consolidate_metadata(store) + if zarr_format == 2: + metadata = { + "subarray": { + "attributes": {}, + "dtype": "float64", + "fill_value": fill_value, + "shape": (1,), + "chunks": (1,), + "order": "C", + "zarr_format": zarr_format, + }, + "subgroup": { + "attributes": {}, + "consolidated_metadata": { + "metadata": {}, + "kind": "inline", + "must_understand": False, + }, + "node_type": "group", + "zarr_format": zarr_format, + }, + } + else: + metadata = { + "subarray": { + "attributes": {}, + "chunk_grid": { + "configuration": {"chunk_shape": (1,)}, + "name": "regular", + }, + "chunk_key_encoding": { + "configuration": {"separator": "/"}, + "name": "default", + }, + "codecs": ({"configuration": {"endian": "little"}, "name": "bytes"},), + "data_type": "float64", + "fill_value": fill_value, + "node_type": "array", + "shape": (1,), + "zarr_format": zarr_format, + }, + "subgroup": { + "attributes": {}, + "consolidated_metadata": { + "metadata": {}, + "kind": "inline", + "must_understand": False, + }, + "node_type": "group", + "zarr_format": zarr_format, + }, + } + + object.__setattr__( + expected_group_values[0].metadata, + "consolidated_metadata", + ConsolidatedMetadata.from_dict( + { + "kind": "inline", + "metadata": metadata, + "must_understand": False, + } + ), + ) + object.__setattr__( + expected_group_values[1].metadata, + "consolidated_metadata", + ConsolidatedMetadata(metadata={}), + ) + + result = sorted(group.groups(), key=lambda x: x[0]) + assert result == expected_groups + + assert sorted(group.groups(), key=lambda x: x[0]) == expected_groups + assert sorted(group.group_keys()) == expected_group_keys + assert sorted(group.group_values(), key=lambda x: x.name) == expected_group_values + + assert sorted(group.arrays(), key=lambda x: x[0]) == expected_arrays + assert sorted(group.array_keys()) == expected_array_keys + assert sorted(group.array_values(), key=lambda x: x.name) == expected_array_values + + +def test_group_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test the behavior of `Group.update_attributes` + """ + attrs = {"foo": 100} + group = Group.from_store(store, zarr_format=zarr_format, attributes=attrs) + assert group.attrs == attrs + new_attrs = {"bar": 100} + new_group = group.update_attributes(new_attrs) + assert new_group.attrs == new_attrs + + +async def test_group_update_attributes_async(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test the behavior of `Group.update_attributes_async` + """ + attrs = {"foo": 100} + group = Group.from_store(store, zarr_format=zarr_format, attributes=attrs) + assert group.attrs == attrs + new_attrs = {"bar": 100} + new_group = await group.update_attributes_async(new_attrs) + assert new_group.attrs == new_attrs + + +@pytest.mark.parametrize("method", ["create_array", "array"]) +def test_group_create_array( + store: Store, + zarr_format: ZarrFormat, + exists_ok: bool, + method: Literal["create_array", "array"], +) -> None: + """ + Test `Group.from_store` + """ + group = Group.from_store(store, zarr_format=zarr_format) + shape = (10, 10) + dtype = "uint8" + data = np.arange(np.prod(shape)).reshape(shape).astype(dtype) + + if method == "create_array": + array = group.create_array(name="array", shape=shape, dtype=dtype, data=data) + elif method == "array": + with pytest.warns(DeprecationWarning): + array = group.array(name="array", shape=shape, dtype=dtype, data=data) + else: + raise AssertionError + + if not exists_ok: + if method == "create_array": + with pytest.raises(ContainsArrayError): + group.create_array(name="array", shape=shape, dtype=dtype, data=data) + elif method == "array": + with pytest.raises(ContainsArrayError), pytest.warns(DeprecationWarning): + group.array(name="array", shape=shape, dtype=dtype, data=data) + assert array.shape == shape + assert array.dtype == np.dtype(dtype) + assert np.array_equal(array[:], data) + + +def test_group_array_creation( + store: Store, + zarr_format: ZarrFormat, +): + group = Group.from_store(store, zarr_format=zarr_format) + shape = (10, 10) + empty_array = group.empty(name="empty", shape=shape) + assert isinstance(empty_array, Array) + assert empty_array.fill_value == 0 + assert empty_array.shape == shape + assert empty_array.store_path.store == store + + empty_like_array = group.empty_like(name="empty_like", data=empty_array) + assert isinstance(empty_like_array, Array) + assert empty_like_array.fill_value == 0 + assert empty_like_array.shape == shape + assert empty_like_array.store_path.store == store + + empty_array_bool = group.empty(name="empty_bool", shape=shape, dtype=np.dtype("bool")) + assert isinstance(empty_array_bool, Array) + assert not empty_array_bool.fill_value + assert empty_array_bool.shape == shape + assert empty_array_bool.store_path.store == store + + empty_like_array_bool = group.empty_like(name="empty_like_bool", data=empty_array_bool) + assert isinstance(empty_like_array_bool, Array) + assert not empty_like_array_bool.fill_value + assert empty_like_array_bool.shape == shape + assert empty_like_array_bool.store_path.store == store + + zeros_array = group.zeros(name="zeros", shape=shape) + assert isinstance(zeros_array, Array) + assert zeros_array.fill_value == 0 + assert zeros_array.shape == shape + assert zeros_array.store_path.store == store + + zeros_like_array = group.zeros_like(name="zeros_like", data=zeros_array) + assert isinstance(zeros_like_array, Array) + assert zeros_like_array.fill_value == 0 + assert zeros_like_array.shape == shape + assert zeros_like_array.store_path.store == store + + ones_array = group.ones(name="ones", shape=shape) + assert isinstance(ones_array, Array) + assert ones_array.fill_value == 1 + assert ones_array.shape == shape + assert ones_array.store_path.store == store + + ones_like_array = group.ones_like(name="ones_like", data=ones_array) + assert isinstance(ones_like_array, Array) + assert ones_like_array.fill_value == 1 + assert ones_like_array.shape == shape + assert ones_like_array.store_path.store == store + + full_array = group.full(name="full", shape=shape, fill_value=42) + assert isinstance(full_array, Array) + assert full_array.fill_value == 42 + assert full_array.shape == shape + assert full_array.store_path.store == store + + full_like_array = group.full_like(name="full_like", data=full_array, fill_value=43) + assert isinstance(full_like_array, Array) + assert full_like_array.fill_value == 43 + assert full_like_array.shape == shape + assert full_like_array.store_path.store == store + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("exists_ok", [True, False]) +@pytest.mark.parametrize("extant_node", ["array", "group"]) +def test_group_creation_existing_node( + store: Store, + zarr_format: ZarrFormat, + exists_ok: bool, + extant_node: Literal["array", "group"], +) -> None: + """ + Check that an existing array or group is handled as expected during group creation. + """ + spath = StorePath(store) + group = Group.from_store(spath, zarr_format=zarr_format) + expected_exception: type[ContainsArrayError | ContainsGroupError] + attributes: dict[str, JSON] = {"old": True} + + if extant_node == "array": + expected_exception = ContainsArrayError + _ = group.create_array("extant", shape=(10,), dtype="uint8", attributes=attributes) + elif extant_node == "group": + expected_exception = ContainsGroupError + _ = group.create_group("extant", attributes=attributes) + else: + raise AssertionError + + new_attributes = {"new": True} + + if exists_ok: + node_new = Group.from_store( + spath / "extant", + attributes=new_attributes, + zarr_format=zarr_format, + exists_ok=exists_ok, + ) + assert node_new.attrs == new_attributes + else: + with pytest.raises(expected_exception): + node_new = Group.from_store( + spath / "extant", + attributes=new_attributes, + zarr_format=zarr_format, + exists_ok=exists_ok, + ) + + +async def test_asyncgroup_create( + store: Store, + exists_ok: bool, + zarr_format: ZarrFormat, +) -> None: + """ + Test that `AsyncGroup.from_store` works as expected. + """ + spath = StorePath(store=store) + attributes = {"foo": 100} + agroup = await AsyncGroup.from_store( + store, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + ) + + assert agroup.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) + assert agroup.store_path == await make_store_path(store) + + if not exists_ok: + with pytest.raises(ContainsGroupError): + agroup = await AsyncGroup.from_store( + spath, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + ) + # create an array at our target path + collision_name = "foo" + _ = await AsyncArray.create( + spath / collision_name, shape=(10,), dtype="uint8", zarr_format=zarr_format + ) + with pytest.raises(ContainsArrayError): + _ = await AsyncGroup.from_store( + StorePath(store=store) / collision_name, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + ) + + +async def test_asyncgroup_attrs(store: Store, zarr_format: ZarrFormat) -> None: + attributes = {"foo": 100} + agroup = await AsyncGroup.from_store(store, zarr_format=zarr_format, attributes=attributes) + + assert agroup.attrs == agroup.metadata.attributes == attributes + + +async def test_asyncgroup_info(store: Store, zarr_format: ZarrFormat) -> None: + agroup = await AsyncGroup.from_store( # noqa: F841 + store, + zarr_format=zarr_format, + ) + pytest.xfail("Info is not implemented for metadata yet") + # assert agroup.info == agroup.metadata.info + + +async def test_asyncgroup_open( + store: Store, + zarr_format: ZarrFormat, +) -> None: + """ + Create an `AsyncGroup`, then ensure that we can open it using `AsyncGroup.open` + """ + attributes = {"foo": 100} + group_w = await AsyncGroup.from_store( + store=store, + attributes=attributes, + exists_ok=False, + zarr_format=zarr_format, + ) + + group_r = await AsyncGroup.open(store=store, zarr_format=zarr_format) + + assert group_w.attrs == group_w.attrs == attributes + assert group_w == group_r + + +async def test_asyncgroup_open_wrong_format( + store: Store, + zarr_format: ZarrFormat, +) -> None: + _ = await AsyncGroup.from_store(store=store, exists_ok=False, zarr_format=zarr_format) + zarr_format_wrong: ZarrFormat + # try opening with the wrong zarr format + if zarr_format == 3: + zarr_format_wrong = 2 + elif zarr_format == 2: + zarr_format_wrong = 3 + else: + raise AssertionError + + with pytest.raises(FileNotFoundError): + await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) + + +# todo: replace the dict[str, Any] type with something a bit more specific +# should this be async? +@pytest.mark.parametrize( + "data", + [ + {"zarr_format": 3, "node_type": "group", "attributes": {"foo": 100}}, + {"zarr_format": 2, "attributes": {"foo": 100}}, + ], +) +def test_asyncgroup_from_dict(store: Store, data: dict[str, Any]) -> None: + """ + Test that we can create an AsyncGroup from a dict + """ + path = "test" + store_path = StorePath(store=store, path=path) + group = AsyncGroup.from_dict(store_path, data=data) + + assert group.metadata.zarr_format == data["zarr_format"] + assert group.metadata.attributes == data["attributes"] + + +# todo: replace this with a declarative API where we model a full hierarchy + + +async def test_asyncgroup_getitem(store: Store, zarr_format: ZarrFormat) -> None: + """ + Create an `AsyncGroup`, then create members of that group, and ensure that we can access those + members via the `AsyncGroup.getitem` method. + """ + agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) + + array_name = "sub_array" + sub_array = await agroup.create_array( + name=array_name, shape=(10,), dtype="uint8", chunk_shape=(2,) + ) + assert await agroup.getitem(array_name) == sub_array + + sub_group_path = "sub_group" + sub_group = await agroup.create_group(sub_group_path, attributes={"foo": 100}) + assert await agroup.getitem(sub_group_path) == sub_group + + # check that asking for a nonexistent key raises KeyError + with pytest.raises(KeyError): + await agroup.getitem("foo") + + +async def test_asyncgroup_delitem(store: Store, zarr_format: ZarrFormat) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") + + agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) + array_name = "sub_array" + _ = await agroup.create_array( + name=array_name, + shape=(10,), + dtype="uint8", + chunk_shape=(2,), + attributes={"foo": 100}, + ) + await agroup.delitem(array_name) + + # todo: clean up the code duplication here + if zarr_format == 2: + assert not await agroup.store_path.store.exists(array_name + "/" + ".zarray") + assert not await agroup.store_path.store.exists(array_name + "/" + ".zattrs") + elif zarr_format == 3: + assert not await agroup.store_path.store.exists(array_name + "/" + "zarr.json") + else: + raise AssertionError + + sub_group_path = "sub_group" + _ = await agroup.create_group(sub_group_path, attributes={"foo": 100}) + await agroup.delitem(sub_group_path) + if zarr_format == 2: + assert not await agroup.store_path.store.exists(array_name + "/" + ".zgroup") + assert not await agroup.store_path.store.exists(array_name + "/" + ".zattrs") + elif zarr_format == 3: + assert not await agroup.store_path.store.exists(array_name + "/" + "zarr.json") + else: + raise AssertionError + + +async def test_asyncgroup_create_group( + store: Store, + zarr_format: ZarrFormat, +) -> None: + agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) + sub_node_path = "sub_group" + attributes = {"foo": 999} + subnode = await agroup.create_group(name=sub_node_path, attributes=attributes) + + assert isinstance(subnode, AsyncGroup) + assert subnode.attrs == attributes + assert subnode.store_path.path == sub_node_path + assert subnode.store_path.store == store + assert subnode.metadata.zarr_format == zarr_format + + +async def test_asyncgroup_create_array( + store: Store, zarr_format: ZarrFormat, exists_ok: bool +) -> None: + """ + Test that the AsyncGroup.create_array method works correctly. We ensure that array properties + specified in create_array are present on the resulting array. + """ + + agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) + + if not exists_ok: + with pytest.raises(ContainsGroupError): + agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) + + shape = (10,) + dtype = "uint8" + chunk_shape = (4,) + attributes: dict[str, JSON] = {"foo": 100} + + sub_node_path = "sub_array" + subnode = await agroup.create_array( + name=sub_node_path, + shape=shape, + dtype=dtype, + chunk_shape=chunk_shape, + attributes=attributes, + ) + assert isinstance(subnode, AsyncArray) + assert subnode.attrs == attributes + assert subnode.store_path.path == sub_node_path + assert subnode.store_path.store == store + assert subnode.shape == shape + assert subnode.dtype == dtype + # todo: fix the type annotation of array.metadata.chunk_grid so that we get some autocomplete + # here. + assert subnode.metadata.chunk_grid.chunk_shape == chunk_shape + assert subnode.metadata.zarr_format == zarr_format + + +async def test_asyncgroup_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: + """ + Test that the AsyncGroup.update_attributes method works correctly. + """ + attributes_old = {"foo": 10} + attributes_new = {"baz": "new"} + agroup = await AsyncGroup.from_store( + store=store, zarr_format=zarr_format, attributes=attributes_old + ) + + agroup_new_attributes = await agroup.update_attributes(attributes_new) + assert agroup_new_attributes.attrs == attributes_new + + +@pytest.mark.parametrize("store", ["local"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +async def test_serializable_async_group(store: LocalStore, zarr_format: ZarrFormat) -> None: + expected = await AsyncGroup.from_store( + store=store, attributes={"foo": 999}, zarr_format=zarr_format + ) + p = pickle.dumps(expected) + actual = pickle.loads(p) + assert actual == expected + + +@pytest.mark.parametrize("store", ["local"], indirect=["store"]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_serializable_sync_group(store: LocalStore, zarr_format: ZarrFormat) -> None: + expected = Group.from_store(store=store, attributes={"foo": 999}, zarr_format=zarr_format) + p = pickle.dumps(expected) + actual = pickle.loads(p) + assert actual == expected + + +@pytest.mark.parametrize("consolidated_metadata", [True, False]) +async def test_group_members_async(store: Store, consolidated_metadata: bool) -> None: + group = await AsyncGroup.from_store( + store=store, + ) + a0 = await group.create_array("a0", shape=(1,)) + g0 = await group.create_group("g0") + a1 = await g0.create_array("a1", shape=(1,)) + g1 = await g0.create_group("g1") + a2 = await g1.create_array("a2", shape=(1,)) + g2 = await g1.create_group("g2") + + # immediate children + children = sorted([x async for x in group.members()], key=lambda x: x[0]) + assert children == [ + ("a0", a0), + ("g0", g0), + ] + + nmembers = await group.nmembers() + assert nmembers == 2 + + # partial + children = sorted([x async for x in group.members(max_depth=1)], key=lambda x: x[0]) + expected = [ + ("a0", a0), + ("g0", g0), + ("g0/a1", a1), + ("g0/g1", g1), + ] + assert children == expected + nmembers = await group.nmembers(max_depth=1) + assert nmembers == 4 + + # all children + all_children = sorted([x async for x in group.members(max_depth=None)], key=lambda x: x[0]) + expected = [ + ("a0", a0), + ("g0", g0), + ("g0/a1", a1), + ("g0/g1", g1), + ("g0/g1/a2", a2), + ("g0/g1/g2", g2), + ] + assert all_children == expected + + if consolidated_metadata: + await zarr.api.asynchronous.consolidate_metadata(store=store) + group = await zarr.api.asynchronous.open_group(store=store) + + nmembers = await group.nmembers(max_depth=None) + assert nmembers == 6 + + with pytest.raises(ValueError, match="max_depth"): + [x async for x in group.members(max_depth=-1)] + + +async def test_require_group(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) + + # create foo group + _ = await root.create_group("foo", attributes={"foo": 100}) + + # test that we can get the group using require_group + foo_group = await root.require_group("foo") + assert foo_group.attrs == {"foo": 100} + + # test that we can get the group using require_group and overwrite=True + foo_group = await root.require_group("foo", overwrite=True) + + _ = await foo_group.create_array( + "bar", shape=(10,), dtype="uint8", chunk_shape=(2,), attributes={"foo": 100} + ) + + # test that overwriting a group w/ children fails + # TODO: figure out why ensure_no_existing_node is not catching the foo.bar array + # + # with pytest.raises(ContainsArrayError): + # await root.require_group("foo", overwrite=True) + + # test that requiring a group where an array is fails + with pytest.raises(TypeError): + await foo_group.require_group("bar") + + +async def test_require_groups(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) + # create foo group + _ = await root.create_group("foo", attributes={"foo": 100}) + # create bar group + _ = await root.create_group("bar", attributes={"bar": 200}) + + foo_group, bar_group = await root.require_groups("foo", "bar") + assert foo_group.attrs == {"foo": 100} + assert bar_group.attrs == {"bar": 200} + + # get a mix of existing and new groups + foo_group, spam_group = await root.require_groups("foo", "spam") + assert foo_group.attrs == {"foo": 100} + assert spam_group.attrs == {} + + # no names + no_group = await root.require_groups() + assert no_group == () + + +async def test_create_dataset(store: Store, zarr_format: ZarrFormat) -> None: + root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) + with pytest.warns(DeprecationWarning): + foo = await root.create_dataset("foo", shape=(10,), dtype="uint8") + assert foo.shape == (10,) + + with pytest.raises(ContainsArrayError), pytest.warns(DeprecationWarning): + await root.create_dataset("foo", shape=(100,), dtype="int8") + + _ = await root.create_group("bar") + with pytest.raises(ContainsGroupError), pytest.warns(DeprecationWarning): + await root.create_dataset("bar", shape=(100,), dtype="int8") + + +async def test_require_array(store: Store, zarr_format: ZarrFormat) -> None: + root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) + foo1 = await root.require_array("foo", shape=(10,), dtype="i8", attributes={"foo": 101}) + assert foo1.attrs == {"foo": 101} + foo2 = await root.require_array("foo", shape=(10,), dtype="i8") + assert foo2.attrs == {"foo": 101} + + # exact = False + _ = await root.require_array("foo", shape=10, dtype="f8") + + # errors w/ exact True + with pytest.raises(TypeError, match="Incompatible dtype"): + await root.require_array("foo", shape=(10,), dtype="f8", exact=True) + + with pytest.raises(TypeError, match="Incompatible shape"): + await root.require_array("foo", shape=(100, 100), dtype="i8") + + with pytest.raises(TypeError, match="Incompatible dtype"): + await root.require_array("foo", shape=(10,), dtype="f4") + + _ = await root.create_group("bar") + with pytest.raises(TypeError, match="Incompatible object"): + await root.require_array("bar", shape=(10,), dtype="int8") + + +@pytest.mark.parametrize("consolidate", [True, False]) +async def test_members_name(store: Store, consolidate: bool, zarr_format: ZarrFormat): + group = Group.from_store(store=store, zarr_format=zarr_format) + a = group.create_group(name="a") + a.create_array("array", shape=(1,)) + b = a.create_group(name="b") + b.create_array("array", shape=(1,)) + + if consolidate: + group = zarr.api.synchronous.consolidate_metadata(store) + + result = group["a"]["b"] + assert result.name == "/a/b" + + paths = sorted(x.name for _, x in group.members(max_depth=None)) + expected = ["/a", "/a/array", "/a/b", "/a/b/array"] + assert paths == expected + + # regression test for https://github.com/zarr-developers/zarr-python/pull/2356 + g = zarr.open_group(store, use_consolidated=False) + with warnings.catch_warnings(): + warnings.simplefilter("error") + assert list(g) + + +async def test_open_mutable_mapping(): + group = await zarr.api.asynchronous.open_group(store={}, mode="w") + assert isinstance(group.store_path.store, MemoryStore) + + +def test_open_mutable_mapping_sync(): + group = zarr.open_group(store={}, mode="w") + assert isinstance(group.store_path.store, MemoryStore) + + +class TestConsolidated: + async def test_group_getitem_consolidated(self, store: Store) -> None: + root = await AsyncGroup.from_store(store=store) + # Set up the test structure with + # / + # g0/ # group /g0 + # g1/ # group /g0/g1 + # g2/ # group /g0/g1/g2 + # x1/ # group /x0 + # x2/ # group /x0/x1 + # x3/ # group /x0/x1/x2 + + g0 = await root.create_group("g0") + g1 = await g0.create_group("g1") + await g1.create_group("g2") + + x0 = await root.create_group("x0") + x1 = await x0.create_group("x1") + await x1.create_group("x2") + + await zarr.api.asynchronous.consolidate_metadata(store) + + # On disk, we've consolidated all the metadata in the root zarr.json + group = await zarr.api.asynchronous.open(store=store) + rg0 = await group.getitem("g0") + + expected = ConsolidatedMetadata( + metadata={ + "g1": GroupMetadata( + attributes={}, + zarr_format=3, + consolidated_metadata=ConsolidatedMetadata( + metadata={ + "g2": GroupMetadata( + attributes={}, + zarr_format=3, + consolidated_metadata=ConsolidatedMetadata(metadata={}), + ) + } + ), + ), + } + ) + assert rg0.metadata.consolidated_metadata == expected + + rg1 = await rg0.getitem("g1") + assert rg1.metadata.consolidated_metadata == expected.metadata["g1"].consolidated_metadata + + rg2 = await rg1.getitem("g2") + assert rg2.metadata.consolidated_metadata == ConsolidatedMetadata(metadata={}) + + async def test_group_delitem_consolidated(self, store: Store) -> None: + if isinstance(store, ZipStore): + raise pytest.skip("Not implemented") + + root = await AsyncGroup.from_store(store=store) + # Set up the test structure with + # / + # g0/ # group /g0 + # g1/ # group /g0/g1 + # g2/ # group /g0/g1/g2 + # data # array + # x1/ # group /x0 + # x2/ # group /x0/x1 + # x3/ # group /x0/x1/x2 + # data # array + + g0 = await root.create_group("g0") + g1 = await g0.create_group("g1") + g2 = await g1.create_group("g2") + await g2.create_array("data", shape=(1,)) + + x0 = await root.create_group("x0") + x1 = await x0.create_group("x1") + x2 = await x1.create_group("x2") + await x2.create_array("data", shape=(1,)) + + await zarr.api.asynchronous.consolidate_metadata(store) + + group = await zarr.api.asynchronous.open_consolidated(store=store) + assert len(group.metadata.consolidated_metadata.metadata) == 2 + assert "g0" in group.metadata.consolidated_metadata.metadata + + await group.delitem("g0") + assert len(group.metadata.consolidated_metadata.metadata) == 1 + assert "g0" not in group.metadata.consolidated_metadata.metadata + + def test_open_consolidated_raises(self, store: Store) -> None: + if isinstance(store, ZipStore): + raise pytest.skip("Not implemented") + + root = Group.from_store(store=store) + + # fine to be missing by default + zarr.open_group(store=store) + + with pytest.raises(ValueError, match="Consolidated metadata requested."): + zarr.open_group(store=store, use_consolidated=True) + + # Now create consolidated metadata... + root.create_group("g0") + zarr.consolidate_metadata(store) + + # and explicitly ignore it. + group = zarr.open_group(store=store, use_consolidated=False) + assert group.metadata.consolidated_metadata is None + + async def test_open_consolidated_raises_async(self, store: Store) -> None: + if isinstance(store, ZipStore): + raise pytest.skip("Not implemented") + + root = await AsyncGroup.from_store(store=store) + + # fine to be missing by default + await zarr.api.asynchronous.open_group(store=store) + + with pytest.raises(ValueError, match="Consolidated metadata requested."): + await zarr.api.asynchronous.open_group(store=store, use_consolidated=True) + + # Now create consolidated metadata... + await root.create_group("g0") + await zarr.api.asynchronous.consolidate_metadata(store) + + # and explicitly ignore it. + group = await zarr.api.asynchronous.open_group(store=store, use_consolidated=False) + assert group.metadata.consolidated_metadata is None + + +class TestGroupMetadata: + def test_from_dict_extra_fields(self): + data = { + "attributes": {"key": "value"}, + "_nczarr_superblock": {"version": "2.0.0"}, + "zarr_format": 2, + } + result = GroupMetadata.from_dict(data) + expected = GroupMetadata(attributes={"key": "value"}, zarr_format=2) + assert result == expected + + +def test_update_attrs() -> None: + # regression test for https://github.com/zarr-developers/zarr-python/issues/2328 + root = Group.from_store( + MemoryStore({}, mode="w"), + ) + root.attrs["foo"] = "bar" + assert root.attrs["foo"] == "bar" diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py new file mode 100644 index 0000000000..0ea9cda39d --- /dev/null +++ b/tests/v3/test_indexing.py @@ -0,0 +1,1929 @@ +from __future__ import annotations + +import itertools +from collections import Counter +from typing import TYPE_CHECKING, Any +from uuid import uuid4 + +import numpy as np +import numpy.typing as npt +import pytest +from numpy.testing import assert_array_equal + +import zarr +from zarr.core.buffer import BufferPrototype, default_buffer_prototype +from zarr.core.indexing import ( + BasicSelection, + CoordinateSelection, + OrthogonalSelection, + Selection, + _iter_grid, + make_slice_selection, + normalize_integer_selection, + oindex, + oindex_set, + replace_ellipsis, +) +from zarr.registry import get_ndbuffer_class +from zarr.storage.common import StorePath +from zarr.storage.memory import MemoryStore + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator + + from zarr.core.array import Array + from zarr.core.buffer.core import Buffer + from zarr.core.common import ChunkCoords + + +@pytest.fixture +async def store() -> AsyncGenerator[StorePath]: + return StorePath(await MemoryStore.open(mode="w")) + + +def zarr_array_from_numpy_array( + store: StorePath, + a: npt.NDArray[Any], + chunk_shape: ChunkCoords | None = None, +) -> zarr.Array: + z = zarr.Array.create( + store=store / str(uuid4()), + shape=a.shape, + dtype=a.dtype, + chunk_shape=chunk_shape or a.shape, + chunk_key_encoding=("v2", "."), + ) + z[()] = a + return z + + +class CountingDict(MemoryStore): + counter: Counter[tuple[str, str]] + + @classmethod + async def open(cls) -> CountingDict: + store = await super().open(mode="w") + store.counter = Counter() + return store + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int | None, int | None] | None = None, + ) -> Buffer | None: + key_suffix = "/".join(key.split("/")[1:]) + self.counter["__getitem__", key_suffix] += 1 + return await super().get(key, prototype, byte_range) + + async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + key_suffix = "/".join(key.split("/")[1:]) + self.counter["__setitem__", key_suffix] += 1 + return await super().set(key, value, byte_range) + + +def test_normalize_integer_selection() -> None: + assert 1 == normalize_integer_selection(1, 100) + assert 99 == normalize_integer_selection(-1, 100) + with pytest.raises(IndexError): + normalize_integer_selection(100, 100) + with pytest.raises(IndexError): + normalize_integer_selection(1000, 100) + with pytest.raises(IndexError): + normalize_integer_selection(-1000, 100) + + +def test_replace_ellipsis() -> None: + # 1D, single item + assert (0,) == replace_ellipsis(0, (100,)) + + # 1D + assert (slice(None),) == replace_ellipsis(Ellipsis, (100,)) + assert (slice(None),) == replace_ellipsis(slice(None), (100,)) + assert (slice(None, 100),) == replace_ellipsis(slice(None, 100), (100,)) + assert (slice(0, None),) == replace_ellipsis(slice(0, None), (100,)) + assert (slice(None),) == replace_ellipsis((slice(None), Ellipsis), (100,)) + assert (slice(None),) == replace_ellipsis((Ellipsis, slice(None)), (100,)) + + # 2D, single item + assert (0, 0) == replace_ellipsis((0, 0), (100, 100)) + assert (-1, 1) == replace_ellipsis((-1, 1), (100, 100)) + + # 2D, single col/row + assert (0, slice(None)) == replace_ellipsis((0, slice(None)), (100, 100)) + assert (0, slice(None)) == replace_ellipsis((0,), (100, 100)) + assert (slice(None), 0) == replace_ellipsis((slice(None), 0), (100, 100)) + + # 2D slice + assert (slice(None), slice(None)) == replace_ellipsis(Ellipsis, (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis(slice(None), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis((slice(None), slice(None)), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis((Ellipsis, slice(None)), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis((slice(None), Ellipsis), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis( + (slice(None), Ellipsis, slice(None)), (100, 100) + ) + assert (slice(None), slice(None)) == replace_ellipsis( + (Ellipsis, slice(None), slice(None)), (100, 100) + ) + assert (slice(None), slice(None)) == replace_ellipsis( + (slice(None), slice(None), Ellipsis), (100, 100) + ) + + +@pytest.mark.parametrize( + ("value", "dtype"), + [ + (42, "uint8"), + pytest.param( + (b"aaa", 1, 4.2), [("foo", "S3"), ("bar", "i4"), ("baz", "f8")], marks=pytest.mark.xfail + ), + ], +) +@pytest.mark.parametrize("use_out", [True, False]) +def test_get_basic_selection_0d(store: StorePath, use_out: bool, value: Any, dtype: Any) -> None: + # setup + arr_np = np.array(value, dtype=dtype) + arr_z = zarr_array_from_numpy_array(store, arr_np) + + assert_array_equal(arr_np, arr_z.get_basic_selection(Ellipsis)) + assert_array_equal(arr_np, arr_z[...]) + assert value == arr_z.get_basic_selection(()) + assert value == arr_z[()] + + if use_out: + # test out param + b = default_buffer_prototype().nd_buffer.from_numpy_array(np.zeros_like(arr_np)) + arr_z.get_basic_selection(Ellipsis, out=b) + assert_array_equal(arr_np, b.as_ndarray_like()) + + # todo: uncomment the structured array tests when we can make them pass, + # or delete them if we formally decide not to support structured dtypes. + + # test structured array + # value = (b"aaa", 1, 4.2) + # a = np.array(value, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) + # z = zarr_array_from_numpy_array(store, a) + # z[()] = value + # assert_array_equal(a, z.get_basic_selection(Ellipsis)) + # assert_array_equal(a, z[...]) + # assert a[()] == z.get_basic_selection(()) + # assert a[()] == z[()] + # assert b"aaa" == z.get_basic_selection((), fields="foo") + # assert b"aaa" == z["foo"] + # assert a[["foo", "bar"]] == z.get_basic_selection((), fields=["foo", "bar"]) + # assert a[["foo", "bar"]] == z["foo", "bar"] + # # test out param + # b = NDBuffer.from_numpy_array(np.zeros_like(a)) + # z.get_basic_selection(Ellipsis, out=b) + # assert_array_equal(a, b) + # c = NDBuffer.from_numpy_array(np.zeros_like(a[["foo", "bar"]])) + # z.get_basic_selection(Ellipsis, out=c, fields=["foo", "bar"]) + # assert_array_equal(a[["foo", "bar"]], c) + + +basic_selections_1d: list[BasicSelection] = [ + # single value + 42, + -1, + # slices + slice(0, 1050), + slice(50, 150), + slice(0, 2000), + slice(-150, -50), + slice(-2000, 2000), + slice(0, 0), # empty result + slice(-1, 0), # empty result + # total selections + slice(None), + Ellipsis, + (), + (Ellipsis, slice(None)), + # slice with step + slice(None), + slice(None, None), + slice(None, None, 1), + slice(None, None, 10), + slice(None, None, 100), + slice(None, None, 1000), + slice(None, None, 10000), + slice(0, 1050), + slice(0, 1050, 1), + slice(0, 1050, 10), + slice(0, 1050, 100), + slice(0, 1050, 1000), + slice(0, 1050, 10000), + slice(1, 31, 3), + slice(1, 31, 30), + slice(1, 31, 300), + slice(81, 121, 3), + slice(81, 121, 30), + slice(81, 121, 300), + slice(50, 150), + slice(50, 150, 1), + slice(50, 150, 10), +] + +basic_selections_1d_bad = [ + # only positive step supported + slice(None, None, -1), + slice(None, None, -10), + slice(None, None, -100), + slice(None, None, -1000), + slice(None, None, -10000), + slice(1050, -1, -1), + slice(1050, -1, -10), + slice(1050, -1, -100), + slice(1050, -1, -1000), + slice(1050, -1, -10000), + slice(1050, 0, -1), + slice(1050, 0, -10), + slice(1050, 0, -100), + slice(1050, 0, -1000), + slice(1050, 0, -10000), + slice(150, 50, -1), + slice(150, 50, -10), + slice(31, 1, -3), + slice(121, 81, -3), + slice(-1, 0, -1), + # bad stuff + 2.3, + "foo", + b"xxx", + None, + (0, 0), + (slice(None), slice(None)), +] + + +def _test_get_basic_selection( + a: npt.NDArray[Any] | Array, z: Array, selection: BasicSelection +) -> None: + expect = a[selection] + actual = z.get_basic_selection(selection) + assert_array_equal(expect, actual) + actual = z[selection] + assert_array_equal(expect, actual) + + # test out param + b = default_buffer_prototype().nd_buffer.from_numpy_array( + np.empty(shape=expect.shape, dtype=expect.dtype) + ) + z.get_basic_selection(selection, out=b) + assert_array_equal(expect, b.as_numpy_array()) + + +# noinspection PyStatementEffect +def test_get_basic_selection_1d(store: StorePath) -> None: + # setup + a = np.arange(1050, dtype=int) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + for selection in basic_selections_1d: + _test_get_basic_selection(a, z, selection) + + for selection_bad in basic_selections_1d_bad: + with pytest.raises(IndexError): + z.get_basic_selection(selection_bad) # type: ignore[arg-type] + with pytest.raises(IndexError): + z[selection_bad] # type: ignore[index] + + with pytest.raises(IndexError): + z.get_basic_selection([1, 0]) # type: ignore[arg-type] + + +basic_selections_2d: list[BasicSelection] = [ + # single row + 42, + -1, + (42, slice(None)), + (-1, slice(None)), + # single col + (slice(None), 4), + (slice(None), -1), + # row slices + slice(None), + slice(0, 1000), + slice(250, 350), + slice(0, 2000), + slice(-350, -250), + slice(0, 0), # empty result + slice(-1, 0), # empty result + slice(-2000, 0), + slice(-2000, 2000), + # 2D slices + (slice(None), slice(1, 5)), + (slice(250, 350), slice(None)), + (slice(250, 350), slice(1, 5)), + (slice(250, 350), slice(-5, -1)), + (slice(250, 350), slice(-50, 50)), + (slice(250, 350, 10), slice(1, 5)), + (slice(250, 350), slice(1, 5, 2)), + (slice(250, 350, 33), slice(1, 5, 3)), + # total selections + (slice(None), slice(None)), + Ellipsis, + (), + (Ellipsis, slice(None)), + (Ellipsis, slice(None), slice(None)), +] + +basic_selections_2d_bad = [ + # bad stuff + 2.3, + "foo", + b"xxx", + None, + (2.3, slice(None)), + # only positive step supported + slice(None, None, -1), + (slice(None, None, -1), slice(None)), + (0, 0, 0), + (slice(None), slice(None), slice(None)), +] + + +# noinspection PyStatementEffect +def test_get_basic_selection_2d(store: StorePath) -> None: + # setup + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + + for selection in basic_selections_2d: + _test_get_basic_selection(a, z, selection) + + bad_selections = basic_selections_2d_bad + [ + # integer arrays + [0, 1], + (slice(None), [0, 1]), + ] + for selection_bad in bad_selections: + with pytest.raises(IndexError): + z.get_basic_selection(selection_bad) # type: ignore[arg-type] + # check fallback on fancy indexing + fancy_selection = ([0, 1], [0, 1]) + np.testing.assert_array_equal(z[fancy_selection], [0, 11]) + + +def test_fancy_indexing_fallback_on_get_setitem(store: StorePath) -> None: + z = zarr_array_from_numpy_array(store, np.zeros((20, 20))) + z[[1, 2, 3], [1, 2, 3]] = 1 + np.testing.assert_array_equal( + z[:4, :4], + [ + [0, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1], + ], + ) + np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) + # test broadcasting + np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) + # test 1D fancy indexing + z2 = zarr_array_from_numpy_array(store, np.zeros(5)) + z2[[1, 2, 3]] = 1 + np.testing.assert_array_equal(z2[:], [0, 1, 1, 1, 0]) + + +@pytest.mark.parametrize( + ("index", "expected_result"), + [ + # Single iterable of integers + ([0, 1], [[0, 1, 2], [3, 4, 5]]), + # List first, then slice + (([0, 1], slice(None)), [[0, 1, 2], [3, 4, 5]]), + # List first, then slice + (([0, 1], slice(1, None)), [[1, 2], [4, 5]]), + # Slice first, then list + ((slice(0, 2), [0, 2]), [[0, 2], [3, 5]]), + # Slices only + ((slice(0, 2), slice(0, 2)), [[0, 1], [3, 4]]), + # List with repeated index + (([1, 0, 1], slice(1, None)), [[4, 5], [1, 2], [4, 5]]), + # 1D indexing + (([1, 0, 1]), [[3, 4, 5], [0, 1, 2], [3, 4, 5]]), + ], +) +def test_orthogonal_indexing_fallback_on_getitem_2d( + store: StorePath, index: Selection, expected_result: npt.ArrayLike +) -> None: + """ + Tests the orthogonal indexing fallback on __getitem__ for a 2D matrix. + + In addition to checking expected behavior, all indexing + is also checked against numpy. + """ + # [0, 1, 2], + # [3, 4, 5], + # [6, 7, 8] + a = np.arange(9).reshape(3, 3) + z = zarr_array_from_numpy_array(store, a) + + np.testing.assert_array_equal(z[index], a[index], err_msg="Indexing disagrees with numpy") + np.testing.assert_array_equal(z[index], expected_result) + + +Index = list[int] | tuple[slice | int | list[int], ...] + + +@pytest.mark.parametrize( + ("index", "expected_result"), + [ + # Single iterable of integers + ([0, 1], [[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14], [15, 16, 17]]]), + # One slice, two integers + ((slice(0, 2), 1, 1), [4, 13]), + # One integer, two slices + ((slice(0, 2), 1, slice(0, 2)), [[3, 4], [12, 13]]), + # Two slices and a list + ((slice(0, 2), [1, 2], slice(0, 2)), [[[3, 4], [6, 7]], [[12, 13], [15, 16]]]), + ], +) +def test_orthogonal_indexing_fallback_on_getitem_3d( + store: StorePath, index: Selection, expected_result: npt.ArrayLike +) -> None: + """ + Tests the orthogonal indexing fallback on __getitem__ for a 3D matrix. + + In addition to checking expected behavior, all indexing + is also checked against numpy. + """ + # [[[ 0, 1, 2], + # [ 3, 4, 5], + # [ 6, 7, 8]], + + # [[ 9, 10, 11], + # [12, 13, 14], + # [15, 16, 17]], + + # [[18, 19, 20], + # [21, 22, 23], + # [24, 25, 26]]] + a = np.arange(27).reshape(3, 3, 3) + z = zarr_array_from_numpy_array(store, a) + + np.testing.assert_array_equal(z[index], a[index], err_msg="Indexing disagrees with numpy") + np.testing.assert_array_equal(z[index], expected_result) + + +@pytest.mark.parametrize( + ("index", "expected_result"), + [ + # Single iterable of integers + ([0, 1], [[1, 1, 1], [1, 1, 1], [0, 0, 0]]), + # List and slice combined + (([0, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), + # Index repetition is ignored on setitem + (([0, 1, 1, 1, 1, 1, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), + # Slice with step + (([0, 2], slice(None, None, 2)), [[1, 0, 1], [0, 0, 0], [1, 0, 1]]), + ], +) +def test_orthogonal_indexing_fallback_on_setitem_2d( + store: StorePath, index: Selection, expected_result: npt.ArrayLike +) -> None: + """ + Tests the orthogonal indexing fallback on __setitem__ for a 3D matrix. + + In addition to checking expected behavior, all indexing + is also checked against numpy. + """ + # Slice + fancy index + a = np.zeros((3, 3)) + z = zarr_array_from_numpy_array(store, a) + z[index] = 1 + a[index] = 1 + np.testing.assert_array_equal(z[:], expected_result) + np.testing.assert_array_equal(z[:], a, err_msg="Indexing disagrees with numpy") + + +def test_fancy_indexing_doesnt_mix_with_implicit_slicing(store: StorePath) -> None: + z2 = zarr_array_from_numpy_array(store, np.zeros((5, 5, 5))) + with pytest.raises(IndexError): + z2[[1, 2, 3], [1, 2, 3]] = 2 + with pytest.raises(IndexError): + np.testing.assert_array_equal(z2[[1, 2, 3], [1, 2, 3]], 0) + with pytest.raises(IndexError): + z2[..., [1, 2, 3]] = 2 # type: ignore[index] + with pytest.raises(IndexError): + np.testing.assert_array_equal(z2[..., [1, 2, 3]], 0) # type: ignore[index] + + +@pytest.mark.parametrize( + ("value", "dtype"), + [ + (42, "uint8"), + pytest.param( + (b"aaa", 1, 4.2), [("foo", "S3"), ("bar", "i4"), ("baz", "f8")], marks=pytest.mark.xfail + ), + ], +) +def test_set_basic_selection_0d( + store: StorePath, value: Any, dtype: str | list[tuple[str, str]] +) -> None: + arr_np = np.array(value, dtype=dtype) + arr_np_zeros = np.zeros_like(arr_np, dtype=dtype) + arr_z = zarr_array_from_numpy_array(store, arr_np_zeros) + assert_array_equal(arr_np_zeros, arr_z) + + arr_z.set_basic_selection(Ellipsis, value) + assert_array_equal(value, arr_z) + arr_z[...] = 0 + assert_array_equal(arr_np_zeros, arr_z) + arr_z[...] = value + assert_array_equal(value, arr_z) + + # todo: uncomment the structured array tests when we can make them pass, + # or delete them if we formally decide not to support structured dtypes. + + # arr_z.set_basic_selection(Ellipsis, v["foo"], fields="foo") + # assert v["foo"] == arr_z["foo"] + # assert arr_np_zeros["bar"] == arr_z["bar"] + # assert arr_np_zeros["baz"] == arr_z["baz"] + # arr_z["bar"] = v["bar"] + # assert v["foo"] == arr_z["foo"] + # assert v["bar"] == arr_z["bar"] + # assert arr_np_zeros["baz"] == arr_z["baz"] + # # multiple field assignment not supported + # with pytest.raises(IndexError): + # arr_z.set_basic_selection(Ellipsis, v[["foo", "bar"]], fields=["foo", "bar"]) + # with pytest.raises(IndexError): + # arr_z[..., "foo", "bar"] = v[["foo", "bar"]] + + +def _test_get_orthogonal_selection( + a: npt.NDArray[Any], z: Array, selection: OrthogonalSelection +) -> None: + expect = oindex(a, selection) + actual = z.get_orthogonal_selection(selection) + assert_array_equal(expect, actual) + actual = z.oindex[selection] + assert_array_equal(expect, actual) + + +# noinspection PyStatementEffect +def test_get_orthogonal_selection_1d_bool(store: StorePath) -> None: + # setup + a = np.arange(1050, dtype=int) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) + _test_get_orthogonal_selection(a, z, ix) + + # test errors + with pytest.raises(IndexError): + z.oindex[np.zeros(50, dtype=bool)] # too short + with pytest.raises(IndexError): + z.oindex[np.zeros(2000, dtype=bool)] # too long + with pytest.raises(IndexError): + # too many dimensions + z.oindex[[[True, False], [False, True]]] # type: ignore[index] + + +# noinspection PyStatementEffect +def test_get_orthogonal_selection_1d_int(store: StorePath) -> None: + # setup + a = np.arange(1050, dtype=int) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 2, 0.5, 0.1, 0.01: + # unordered + ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) + _test_get_orthogonal_selection(a, z, ix) + # increasing + ix.sort() + _test_get_orthogonal_selection(a, z, ix) + # decreasing + ix = ix[::-1] + _test_get_orthogonal_selection(a, z, ix) + + selections = basic_selections_1d + [ + # test wraparound + [0, 3, 10, -23, -12, -1], + # explicit test not sorted + [3, 105, 23, 127], + ] + for selection in selections: + _test_get_orthogonal_selection(a, z, selection) + + bad_selections = basic_selections_1d_bad + [ + [a.shape[0] + 1], # out of bounds + [-(a.shape[0] + 1)], # out of bounds + [[2, 4], [6, 8]], # too many dimensions + ] + for bad_selection in bad_selections: + with pytest.raises(IndexError): + z.get_orthogonal_selection(bad_selection) # type: ignore[arg-type] + with pytest.raises(IndexError): + z.oindex[bad_selection] # type: ignore[index] + + +def _test_get_orthogonal_selection_2d( + a: npt.NDArray[Any], z: Array, ix0: npt.NDArray[np.bool], ix1: npt.NDArray[np.bool] +) -> None: + selections = [ + # index both axes with array + (ix0, ix1), + # mixed indexing with array / slice + (ix0, slice(1, 5)), + (ix0, slice(1, 5, 2)), + (slice(250, 350), ix1), + (slice(250, 350, 10), ix1), + # mixed indexing with array / int + (ix0, 4), + (42, ix1), + ] + for selection in selections: + _test_get_orthogonal_selection(a, z, selection) + + +# noinspection PyStatementEffect +def test_get_orthogonal_selection_2d(store: StorePath) -> None: + # setup + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + # boolean arrays + ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) + _test_get_orthogonal_selection_2d(a, z, ix0, ix1) + + # mixed int array / bool array + selections = ( + (ix0, np.nonzero(ix1)[0]), + (np.nonzero(ix0)[0], ix1), + ) + for selection in selections: + _test_get_orthogonal_selection(a, z, selection) + + # integer arrays + ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) + _test_get_orthogonal_selection_2d(a, z, ix0, ix1) + ix0.sort() + ix1.sort() + _test_get_orthogonal_selection_2d(a, z, ix0, ix1) + ix0 = ix0[::-1] + ix1 = ix1[::-1] + _test_get_orthogonal_selection_2d(a, z, ix0, ix1) + + for selection_2d in basic_selections_2d: + _test_get_orthogonal_selection(a, z, selection_2d) + + for selection_2d_bad in basic_selections_2d_bad: + with pytest.raises(IndexError): + z.get_orthogonal_selection(selection_2d_bad) # type: ignore[arg-type] + with pytest.raises(IndexError): + z.oindex[selection_2d_bad] # type: ignore[index] + + +def _test_get_orthogonal_selection_3d( + a: npt.NDArray, + z: Array, + ix0: npt.NDArray[np.bool], + ix1: npt.NDArray[np.bool], + ix2: npt.NDArray[np.bool], +) -> None: + selections = [ + # single value + (84, 42, 4), + (-1, -1, -1), + # index all axes with array + (ix0, ix1, ix2), + # mixed indexing with single array / slices + (ix0, slice(15, 25), slice(1, 5)), + (slice(50, 70), ix1, slice(1, 5)), + (slice(50, 70), slice(15, 25), ix2), + (ix0, slice(15, 25, 5), slice(1, 5, 2)), + (slice(50, 70, 3), ix1, slice(1, 5, 2)), + (slice(50, 70, 3), slice(15, 25, 5), ix2), + # mixed indexing with single array / ints + (ix0, 42, 4), + (84, ix1, 4), + (84, 42, ix2), + # mixed indexing with single array / slice / int + (ix0, slice(15, 25), 4), + (42, ix1, slice(1, 5)), + (slice(50, 70), 42, ix2), + # mixed indexing with two array / slice + (ix0, ix1, slice(1, 5)), + (slice(50, 70), ix1, ix2), + (ix0, slice(15, 25), ix2), + # mixed indexing with two array / integer + (ix0, ix1, 4), + (42, ix1, ix2), + (ix0, 42, ix2), + ] + for selection in selections: + _test_get_orthogonal_selection(a, z, selection) + + +def test_get_orthogonal_selection_3d(store: StorePath) -> None: + # setup + a = np.arange(100000, dtype=int).reshape(200, 50, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(60, 20, 3)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + # boolean arrays + ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) + ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) + _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) + + # integer arrays + ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) + ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) + _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) + ix0.sort() + ix1.sort() + ix2.sort() + _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) + ix0 = ix0[::-1] + ix1 = ix1[::-1] + ix2 = ix2[::-1] + _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) + + +def test_orthogonal_indexing_edge_cases(store: StorePath) -> None: + a = np.arange(6).reshape(1, 2, 3) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(1, 2, 3)) + + expect = oindex(a, (0, slice(None), [0, 1, 2])) + actual = z.oindex[0, :, [0, 1, 2]] + assert_array_equal(expect, actual) + + expect = oindex(a, (0, slice(None), [True, True, True])) + actual = z.oindex[0, :, [True, True, True]] + assert_array_equal(expect, actual) + + +def _test_set_orthogonal_selection( + v: npt.NDArray[np.int_], a: npt.NDArray[Any], z: Array, selection: OrthogonalSelection +) -> None: + for value in 42, oindex(v, selection), oindex(v, selection).tolist(): + if isinstance(value, list) and value == []: + # skip these cases as cannot preserve all dimensions + continue + # setup expectation + a[:] = 0 + oindex_set(a, selection, value) + # long-form API + z[:] = 0 + z.set_orthogonal_selection(selection, value) + assert_array_equal(a, z[:]) + # short-form API + z[:] = 0 + z.oindex[selection] = value + assert_array_equal(a, z[:]) + + +def test_set_orthogonal_selection_1d(store: StorePath) -> None: + # setup + v = np.arange(1050, dtype=int) + a = np.empty(v.shape, dtype=int) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + # test with different degrees of sparseness + np.random.seed(42) + for p in 0.5, 0.1, 0.01: + # boolean arrays + ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) + _test_set_orthogonal_selection(v, a, z, ix) + + # integer arrays + ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) + _test_set_orthogonal_selection(v, a, z, ix) + ix.sort() + _test_set_orthogonal_selection(v, a, z, ix) + ix = ix[::-1] + _test_set_orthogonal_selection(v, a, z, ix) + + # basic selections + for selection in basic_selections_1d: + _test_set_orthogonal_selection(v, a, z, selection) + + +def _test_set_orthogonal_selection_2d( + v: npt.NDArray[np.int_], + a: npt.NDArray[np.int_], + z: Array, + ix0: npt.NDArray[np.bool], + ix1: npt.NDArray[np.bool], +) -> None: + selections = [ + # index both axes with array + (ix0, ix1), + # mixed indexing with array / slice or int + (ix0, slice(1, 5)), + (slice(250, 350), ix1), + (ix0, 4), + (42, ix1), + ] + for selection in selections: + _test_set_orthogonal_selection(v, a, z, selection) + + +def test_set_orthogonal_selection_2d(store: StorePath) -> None: + # setup + v = np.arange(10000, dtype=int).reshape(1000, 10) + a = np.empty_like(v) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + # boolean arrays + ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) + _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) + + # integer arrays + ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) + _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) + ix0.sort() + ix1.sort() + _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) + ix0 = ix0[::-1] + ix1 = ix1[::-1] + _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) + + for selection in basic_selections_2d: + _test_set_orthogonal_selection(v, a, z, selection) + + +def _test_set_orthogonal_selection_3d( + v: npt.NDArray[np.int_], + a: npt.NDArray[np.int_], + z: Array, + ix0: npt.NDArray[np.bool], + ix1: npt.NDArray[np.bool], + ix2: npt.NDArray[np.bool], +) -> None: + selections = ( + # single value + (84, 42, 4), + (-1, -1, -1), + # index all axes with bool array + (ix0, ix1, ix2), + # mixed indexing with single bool array / slice or int + (ix0, slice(15, 25), slice(1, 5)), + (slice(50, 70), ix1, slice(1, 5)), + (slice(50, 70), slice(15, 25), ix2), + (ix0, 42, 4), + (84, ix1, 4), + (84, 42, ix2), + (ix0, slice(15, 25), 4), + (slice(50, 70), ix1, 4), + (slice(50, 70), 42, ix2), + # indexing with two arrays / slice + (ix0, ix1, slice(1, 5)), + # indexing with two arrays / integer + (ix0, ix1, 4), + ) + for selection in selections: + _test_set_orthogonal_selection(v, a, z, selection) + + +def test_set_orthogonal_selection_3d(store: StorePath) -> None: + # setup + v = np.arange(100000, dtype=int).reshape(200, 50, 10) + a = np.empty_like(v) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(60, 20, 3)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + # boolean arrays + ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) + ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) + _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) + + # integer arrays + ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) + ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) + _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) + + # sorted increasing + ix0.sort() + ix1.sort() + ix2.sort() + _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) + + # sorted decreasing + ix0 = ix0[::-1] + ix1 = ix1[::-1] + ix2 = ix2[::-1] + _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) + + +def test_orthogonal_indexing_fallback_on_get_setitem(store: StorePath) -> None: + z = zarr_array_from_numpy_array(store, np.zeros((20, 20))) + z[[1, 2, 3], [1, 2, 3]] = 1 + np.testing.assert_array_equal( + z[:4, :4], + [ + [0, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1], + ], + ) + np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) + # test broadcasting + np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) + # test 1D fancy indexing + z2 = zarr_array_from_numpy_array(store, np.zeros(5)) + z2[[1, 2, 3]] = 1 + np.testing.assert_array_equal(z2[:], [0, 1, 1, 1, 0]) + + +def _test_get_coordinate_selection( + a: npt.NDArray, z: Array, selection: CoordinateSelection +) -> None: + expect = a[selection] + actual = z.get_coordinate_selection(selection) + assert_array_equal(expect, actual) + actual = z.vindex[selection] + assert_array_equal(expect, actual) + + +coordinate_selections_1d_bad = [ + # slice not supported + slice(5, 15), + slice(None), + Ellipsis, + # bad stuff + 2.3, + "foo", + b"xxx", + None, + (0, 0), + (slice(None), slice(None)), +] + + +# noinspection PyStatementEffect +def test_get_coordinate_selection_1d(store: StorePath) -> None: + # setup + a = np.arange(1050, dtype=int) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 2, 0.5, 0.1, 0.01: + n = int(a.size * p) + ix = np.random.choice(a.shape[0], size=n, replace=True) + _test_get_coordinate_selection(a, z, ix) + ix.sort() + _test_get_coordinate_selection(a, z, ix) + ix = ix[::-1] + _test_get_coordinate_selection(a, z, ix) + + selections = [ + # test single item + 42, + -1, + # test wraparound + [0, 3, 10, -23, -12, -1], + # test out of order + [3, 105, 23, 127], # not monotonically increasing + # test multi-dimensional selection + np.array([[2, 4], [6, 8]]), + ] + for selection in selections: + _test_get_coordinate_selection(a, z, selection) + + # test errors + bad_selections = coordinate_selections_1d_bad + [ + [a.shape[0] + 1], # out of bounds + [-(a.shape[0] + 1)], # out of bounds + ] + for selection in bad_selections: + with pytest.raises(IndexError): + z.get_coordinate_selection(selection) # type: ignore[arg-type] + with pytest.raises(IndexError): + z.vindex[selection] # type: ignore[index] + + +def test_get_coordinate_selection_2d(store: StorePath) -> None: + # setup + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + + np.random.seed(42) + ix0: npt.ArrayLike + ix1: npt.ArrayLike + # test with different degrees of sparseness + for p in 2, 0.5, 0.1, 0.01: + n = int(a.size * p) + ix0 = np.random.choice(a.shape[0], size=n, replace=True) + ix1 = np.random.choice(a.shape[1], size=n, replace=True) + selections = [ + # single value + (42, 4), + (-1, -1), + # index both axes with array + (ix0, ix1), + # mixed indexing with array / int + (ix0, 4), + (42, ix1), + (42, 4), + ] + for selection in selections: + _test_get_coordinate_selection(a, z, selection) + + # not monotonically increasing (first dim) + ix0 = [3, 3, 4, 2, 5] + ix1 = [1, 3, 5, 7, 9] + _test_get_coordinate_selection(a, z, (ix0, ix1)) + + # not monotonically increasing (second dim) + ix0 = [1, 1, 2, 2, 5] + ix1 = [1, 3, 2, 1, 0] + _test_get_coordinate_selection(a, z, (ix0, ix1)) + + # multi-dimensional selection + ix0 = np.array([[1, 1, 2], [2, 2, 5]]) + ix1 = np.array([[1, 3, 2], [1, 0, 0]]) + _test_get_coordinate_selection(a, z, (ix0, ix1)) + + with pytest.raises(IndexError): + selection = slice(5, 15), [1, 2, 3] + z.get_coordinate_selection(selection) # type:ignore[arg-type] + with pytest.raises(IndexError): + selection = [1, 2, 3], slice(5, 15) + z.get_coordinate_selection(selection) # type:ignore[arg-type] + with pytest.raises(IndexError): + selection = Ellipsis, [1, 2, 3] + z.get_coordinate_selection(selection) # type:ignore[arg-type] + with pytest.raises(IndexError): + selection = Ellipsis + z.get_coordinate_selection(selection) # type:ignore[arg-type] + + +def _test_set_coordinate_selection( + v: npt.NDArray, a: npt.NDArray, z: Array, selection: CoordinateSelection +) -> None: + for value in 42, v[selection], v[selection].tolist(): + # setup expectation + a[:] = 0 + a[selection] = value + # test long-form API + z[:] = 0 + z.set_coordinate_selection(selection, value) + assert_array_equal(a, z[:]) + # test short-form API + z[:] = 0 + z.vindex[selection] = value + assert_array_equal(a, z[:]) + + +def test_set_coordinate_selection_1d(store: StorePath) -> None: + # setup + v = np.arange(1050, dtype=int) + a = np.empty(v.shape, dtype=v.dtype) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 2, 0.5, 0.1, 0.01: + n = int(a.size * p) + ix = np.random.choice(a.shape[0], size=n, replace=True) + _test_set_coordinate_selection(v, a, z, ix) + + # multi-dimensional selection + ix = np.array([[2, 4], [6, 8]]) + _test_set_coordinate_selection(v, a, z, ix) + + for selection in coordinate_selections_1d_bad: + with pytest.raises(IndexError): + z.set_coordinate_selection(selection, 42) # type:ignore[arg-type] + with pytest.raises(IndexError): + z.vindex[selection] = 42 # type:ignore[index] + + +def test_set_coordinate_selection_2d(store: StorePath) -> None: + # setup + v = np.arange(10000, dtype=int).reshape(1000, 10) + a = np.empty_like(v) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 2, 0.5, 0.1, 0.01: + n = int(a.size * p) + ix0 = np.random.choice(a.shape[0], size=n, replace=True) + ix1 = np.random.choice(a.shape[1], size=n, replace=True) + + selections = ( + (42, 4), + (-1, -1), + # index both axes with array + (ix0, ix1), + # mixed indexing with array / int + (ix0, 4), + (42, ix1), + ) + for selection in selections: + _test_set_coordinate_selection(v, a, z, selection) + + # multi-dimensional selection + ix0 = np.array([[1, 2, 3], [4, 5, 6]]) + ix1 = np.array([[1, 3, 2], [2, 0, 5]]) + _test_set_coordinate_selection(v, a, z, (ix0, ix1)) + + +def _test_get_block_selection( + a: npt.NDArray[Any], + z: Array, + selection: BasicSelection, + expected_idx: slice | tuple[slice, ...], +) -> None: + expect = a[expected_idx] + actual = z.get_block_selection(selection) + assert_array_equal(expect, actual) + actual = z.blocks[selection] + assert_array_equal(expect, actual) + + +block_selections_1d: list[BasicSelection] = [ + # test single item + 0, + 5, + # test wraparound + -1, + -4, + # test slice + slice(5), + slice(None, 3), + slice(5, 6), + slice(-3, -1), + slice(None), # Full slice +] + +block_selections_1d_array_projection: list[slice] = [ + # test single item + slice(100), + slice(500, 600), + # test wraparound + slice(1000, None), + slice(700, 800), + # test slice + slice(500), + slice(None, 300), + slice(500, 600), + slice(800, 1000), + slice(None), +] + +block_selections_1d_bad = [ + # slice not supported + slice(3, 8, 2), + # bad stuff + 2.3, + # "foo", # TODO + b"xxx", + None, + (0, 0), + (slice(None), slice(None)), + [0, 5, 3], +] + + +def test_get_block_selection_1d(store: StorePath) -> None: + # setup + a = np.arange(1050, dtype=int) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + for selection, expected_idx in zip( + block_selections_1d, block_selections_1d_array_projection, strict=True + ): + _test_get_block_selection(a, z, selection, expected_idx) + + bad_selections = block_selections_1d_bad + [ + z.metadata.chunk_grid.get_nchunks(z.shape) + 1, # out of bounds + -(z.metadata.chunk_grid.get_nchunks(z.shape) + 1), # out of bounds + ] + + for selection_bad in bad_selections: + with pytest.raises(IndexError): + z.get_block_selection(selection_bad) # type:ignore[arg-type] + with pytest.raises(IndexError): + z.blocks[selection_bad] # type:ignore[index] + + +block_selections_2d: list[BasicSelection] = [ + # test single item + (0, 0), + (1, 2), + # test wraparound + (-1, -1), + (-3, -2), + # test slice + (slice(1), slice(2)), + (slice(None, 2), slice(-2, -1)), + (slice(2, 3), slice(-2, None)), + (slice(-3, -1), slice(-3, -2)), + (slice(None), slice(None)), # Full slice +] + +block_selections_2d_array_projection: list[tuple[slice, slice]] = [ + # test single item + (slice(300), slice(3)), + (slice(300, 600), slice(6, 9)), + # test wraparound + (slice(900, None), slice(9, None)), + (slice(300, 600), slice(6, 9)), + # test slice + (slice(300), slice(6)), + (slice(None, 600), slice(6, 9)), + (slice(600, 900), slice(6, None)), + (slice(300, 900), slice(3, 6)), + (slice(None), slice(None)), # Full slice +] + + +def test_get_block_selection_2d(store: StorePath) -> None: + # setup + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + + for selection, expected_idx in zip( + block_selections_2d, block_selections_2d_array_projection, strict=True + ): + _test_get_block_selection(a, z, selection, expected_idx) + + with pytest.raises(IndexError): + selection = slice(5, 15), [1, 2, 3] + z.get_block_selection(selection) + with pytest.raises(IndexError): + selection = Ellipsis, [1, 2, 3] + z.get_block_selection(selection) + with pytest.raises(IndexError): # out of bounds + selection = slice(15, 20), slice(None) + z.get_block_selection(selection) + + +def _test_set_block_selection( + v: npt.NDArray[Any], + a: npt.NDArray[Any], + z: zarr.Array, + selection: BasicSelection, + expected_idx: slice, +) -> None: + for value in 42, v[expected_idx], v[expected_idx].tolist(): + # setup expectation + a[:] = 0 + a[expected_idx] = value + # test long-form API + z[:] = 0 + z.set_block_selection(selection, value) + assert_array_equal(a, z[:]) + # test short-form API + z[:] = 0 + z.blocks[selection] = value + assert_array_equal(a, z[:]) + + +def test_set_block_selection_1d(store: StorePath) -> None: + # setup + v = np.arange(1050, dtype=int) + a = np.empty(v.shape, dtype=v.dtype) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + for selection, expected_idx in zip( + block_selections_1d, block_selections_1d_array_projection, strict=True + ): + _test_set_block_selection(v, a, z, selection, expected_idx) + + for selection_bad in block_selections_1d_bad: + with pytest.raises(IndexError): + z.set_block_selection(selection_bad, 42) # type:ignore[arg-type] + with pytest.raises(IndexError): + z.blocks[selection_bad] = 42 # type:ignore[index] + + +def test_set_block_selection_2d(store: StorePath) -> None: + # setup + v = np.arange(10000, dtype=int).reshape(1000, 10) + a = np.empty(v.shape, dtype=v.dtype) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + + for selection, expected_idx in zip( + block_selections_2d, block_selections_2d_array_projection, strict=True + ): + _test_set_block_selection(v, a, z, selection, expected_idx) + + with pytest.raises(IndexError): + selection = slice(5, 15), [1, 2, 3] + z.set_block_selection(selection, 42) + with pytest.raises(IndexError): + selection = Ellipsis, [1, 2, 3] + z.set_block_selection(selection, 42) + with pytest.raises(IndexError): # out of bounds + selection = slice(15, 20), slice(None) + z.set_block_selection(selection, 42) + + +def _test_get_mask_selection(a: npt.NDArray[Any], z: Array, selection: npt.NDArray) -> None: + expect = a[selection] + actual = z.get_mask_selection(selection) + assert_array_equal(expect, actual) + actual = z.vindex[selection] + assert_array_equal(expect, actual) + actual = z[selection] + assert_array_equal(expect, actual) + + +mask_selections_1d_bad = [ + # slice not supported + slice(5, 15), + slice(None), + Ellipsis, + # bad stuff + 2.3, + "foo", + b"xxx", + None, + (0, 0), + (slice(None), slice(None)), +] + + +# noinspection PyStatementEffect +def test_get_mask_selection_1d(store: StorePath) -> None: + # setup + a = np.arange(1050, dtype=int) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) + _test_get_mask_selection(a, z, ix) + + # test errors + bad_selections = mask_selections_1d_bad + [ + np.zeros(50, dtype=bool), # too short + np.zeros(2000, dtype=bool), # too long + [[True, False], [False, True]], # too many dimensions + ] + for selection in bad_selections: + with pytest.raises(IndexError): + z.get_mask_selection(selection) # type: ignore[arg-type] + with pytest.raises(IndexError): + z.vindex[selection] # type:ignore[index] + + +# noinspection PyStatementEffect +def test_get_mask_selection_2d(store: StorePath) -> None: + # setup + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape) + _test_get_mask_selection(a, z, ix) + + # test errors + with pytest.raises(IndexError): + z.vindex[np.zeros((1000, 5), dtype=bool)] # too short + with pytest.raises(IndexError): + z.vindex[np.zeros((2000, 10), dtype=bool)] # too long + with pytest.raises(IndexError): + z.vindex[[True, False]] # wrong no. dimensions + + +def _test_set_mask_selection( + v: npt.NDArray, a: npt.NDArray, z: Array, selection: npt.NDArray +) -> None: + a[:] = 0 + z[:] = 0 + a[selection] = v[selection] + z.set_mask_selection(selection, v[selection]) + assert_array_equal(a, z[:]) + z[:] = 0 + z.vindex[selection] = v[selection] + assert_array_equal(a, z[:]) + z[:] = 0 + z[selection] = v[selection] + assert_array_equal(a, z[:]) + + +def test_set_mask_selection_1d(store: StorePath) -> None: + # setup + v = np.arange(1050, dtype=int) + a = np.empty_like(v) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) + _test_set_mask_selection(v, a, z, ix) + + for selection in mask_selections_1d_bad: + with pytest.raises(IndexError): + z.set_mask_selection(selection, 42) # type: ignore[arg-type] + with pytest.raises(IndexError): + z.vindex[selection] = 42 # type: ignore[index] + + +def test_set_mask_selection_2d(store: StorePath) -> None: + # setup + v = np.arange(10000, dtype=int).reshape(1000, 10) + a = np.empty_like(v) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape) + _test_set_mask_selection(v, a, z, ix) + + +def test_get_selection_out(store: StorePath) -> None: + # basic selections + a = np.arange(1050) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + + selections = [ + slice(50, 150), + slice(0, 1050), + slice(1, 2), + ] + for selection in selections: + expect = a[selection] + out = get_ndbuffer_class().from_numpy_array(np.empty(expect.shape)) + z.get_basic_selection(selection, out=out) + assert_array_equal(expect, out.as_numpy_array()[:]) + + with pytest.raises(TypeError): + z.get_basic_selection(Ellipsis, out=[]) # type: ignore[arg-type] + + # orthogonal selections + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) + selections = [ + # index both axes with array + (ix0, ix1), + # mixed indexing with array / slice + (ix0, slice(1, 5)), + (slice(250, 350), ix1), + # mixed indexing with array / int + (ix0, 4), + (42, ix1), + # mixed int array / bool array + (ix0, np.nonzero(ix1)[0]), + (np.nonzero(ix0)[0], ix1), + ] + for selection in selections: + expect = oindex(a, selection) + out = get_ndbuffer_class().from_numpy_array(np.zeros(expect.shape, dtype=expect.dtype)) + z.get_orthogonal_selection(selection, out=out) + assert_array_equal(expect, out.as_numpy_array()[:]) + + # coordinate selections + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + np.random.seed(42) + # test with different degrees of sparseness + for p in 0.5, 0.1, 0.01: + n = int(a.size * p) + ix0 = np.random.choice(a.shape[0], size=n, replace=True) + ix1 = np.random.choice(a.shape[1], size=n, replace=True) + selections = [ + # index both axes with array + (ix0, ix1), + # mixed indexing with array / int + (ix0, 4), + (42, ix1), + ] + for selection in selections: + expect = a[selection] + out = get_ndbuffer_class().from_numpy_array(np.zeros(expect.shape, dtype=expect.dtype)) + z.get_coordinate_selection(selection, out=out) + assert_array_equal(expect, out.as_numpy_array()[:]) + + +@pytest.mark.xfail(reason="fields are not supported in v3") +def test_get_selections_with_fields(store: StorePath) -> None: + a = np.array( + [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)], + dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")], + ) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(2,)) + + fields_fixture: list[str | list[str]] = [ + "foo", + ["foo"], + ["foo", "bar"], + ["foo", "baz"], + ["bar", "baz"], + ["foo", "bar", "baz"], + ["bar", "foo"], + ["baz", "bar", "foo"], + ] + + for fields in fields_fixture: + # total selection + expect = a[fields] + actual = z.get_basic_selection(Ellipsis, fields=fields) + assert_array_equal(expect, actual) + # alternative API + if isinstance(fields, str): + actual = z[fields] + assert_array_equal(expect, actual) + elif len(fields) == 2: + actual = z[fields[0], fields[1]] + assert_array_equal(expect, actual) + if isinstance(fields, str): + actual = z[..., fields] + assert_array_equal(expect, actual) + elif len(fields) == 2: + actual = z[..., fields[0], fields[1]] + assert_array_equal(expect, actual) + + # basic selection with slice + expect = a[fields][0:2] + actual = z.get_basic_selection(slice(0, 2), fields=fields) + assert_array_equal(expect, actual) + # alternative API + if isinstance(fields, str): + actual = z[0:2, fields] + assert_array_equal(expect, actual) + elif len(fields) == 2: + actual = z[0:2, fields[0], fields[1]] + assert_array_equal(expect, actual) + + # basic selection with single item + expect = a[fields][1] + actual = z.get_basic_selection(1, fields=fields) + assert_array_equal(expect, actual) + # alternative API + if isinstance(fields, str): + actual = z[1, fields] + assert_array_equal(expect, actual) + elif len(fields) == 2: + actual = z[1, fields[0], fields[1]] + assert_array_equal(expect, actual) + + # orthogonal selection + ix = [0, 2] + expect = a[fields][ix] + actual = z.get_orthogonal_selection(ix, fields=fields) + assert_array_equal(expect, actual) + # alternative API + if isinstance(fields, str): + actual = z.oindex[ix, fields] + assert_array_equal(expect, actual) + elif len(fields) == 2: + actual = z.oindex[ix, fields[0], fields[1]] + assert_array_equal(expect, actual) + + # coordinate selection + ix = [0, 2] + expect = a[fields][ix] + actual = z.get_coordinate_selection(ix, fields=fields) + assert_array_equal(expect, actual) + # alternative API + if isinstance(fields, str): + actual = z.vindex[ix, fields] + assert_array_equal(expect, actual) + elif len(fields) == 2: + actual = z.vindex[ix, fields[0], fields[1]] + assert_array_equal(expect, actual) + + # mask selection + ix = [True, False, True] + expect = a[fields][ix] + actual = z.get_mask_selection(ix, fields=fields) + assert_array_equal(expect, actual) + # alternative API + if isinstance(fields, str): + actual = z.vindex[ix, fields] + assert_array_equal(expect, actual) + elif len(fields) == 2: + actual = z.vindex[ix, fields[0], fields[1]] + assert_array_equal(expect, actual) + + # missing/bad fields + with pytest.raises(IndexError): + z.get_basic_selection(Ellipsis, fields=["notafield"]) + with pytest.raises(IndexError): + z.get_basic_selection(Ellipsis, fields=slice(None)) # type: ignore[arg-type] + + +@pytest.mark.xfail(reason="fields are not supported in v3") +def test_set_selections_with_fields(store: StorePath) -> None: + v = np.array( + [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)], + dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")], + ) + a = np.empty_like(v) + z = zarr_array_from_numpy_array(store, v, chunk_shape=(2,)) + + fields_fixture: list[str | list[str]] = [ + "foo", + [], + ["foo"], + ["foo", "bar"], + ["foo", "baz"], + ["bar", "baz"], + ["foo", "bar", "baz"], + ["bar", "foo"], + ["baz", "bar", "foo"], + ] + + for fields in fields_fixture: + # currently multi-field assignment is not supported in numpy, so we won't support + # it either + if isinstance(fields, list) and len(fields) > 1: + with pytest.raises(IndexError): + z.set_basic_selection(Ellipsis, v, fields=fields) + with pytest.raises(IndexError): + z.set_orthogonal_selection([0, 2], v, fields=fields) # type: ignore[arg-type] + with pytest.raises(IndexError): + z.set_coordinate_selection([0, 2], v, fields=fields) + with pytest.raises(IndexError): + z.set_mask_selection([True, False, True], v, fields=fields) # type: ignore[arg-type] + + else: + if isinstance(fields, list) and len(fields) == 1: + # work around numpy does not support multi-field assignment even if there + # is only one field + key = fields[0] + elif isinstance(fields, list) and len(fields) == 0: + # work around numpy ambiguity about what is a field selection + key = Ellipsis + else: + key = fields + + # setup expectation + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) + assert_array_equal(a, z[:]) + a[key] = v[key] + # total selection + z.set_basic_selection(Ellipsis, v[key], fields=fields) + assert_array_equal(a, z[:]) + + # basic selection with slice + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) + a[key][0:2] = v[key][0:2] + z.set_basic_selection(slice(0, 2), v[key][0:2], fields=fields) + assert_array_equal(a, z[:]) + + # orthogonal selection + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) + ix = [0, 2] + a[key][ix] = v[key][ix] + z.set_orthogonal_selection(ix, v[key][ix], fields=fields) + assert_array_equal(a, z[:]) + + # coordinate selection + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) + ix = [0, 2] + a[key][ix] = v[key][ix] + z.set_coordinate_selection(ix, v[key][ix], fields=fields) + assert_array_equal(a, z[:]) + + # mask selection + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) + ix = [True, False, True] + a[key][ix] = v[key][ix] + z.set_mask_selection(ix, v[key][ix], fields=fields) + assert_array_equal(a, z[:]) + + +def test_slice_selection_uints() -> None: + arr = np.arange(24).reshape((4, 6)) + idx = np.uint64(3) + slice_sel = make_slice_selection((idx,)) + assert arr[tuple(slice_sel)].shape == (1, 6) + + +def test_numpy_int_indexing(store: StorePath) -> None: + a = np.arange(1050) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) + assert a[42] == z[42] + assert a[np.int64(42)] == z[np.int64(42)] + + +@pytest.mark.parametrize( + ("shape", "chunks", "ops"), + [ + # 1D test cases + ((1070,), (50,), [("__getitem__", (slice(200, 400),))]), + ((1070,), (50,), [("__getitem__", (slice(200, 400, 100),))]), + ( + (1070,), + (50,), + [ + ("__getitem__", (slice(200, 400),)), + ("__setitem__", (slice(200, 400, 100),)), + ], + ), + # 2D test cases + ( + (40, 50), + (5, 8), + [ + ("__getitem__", (slice(6, 37, 13), (slice(4, 10)))), + ("__setitem__", (slice(None), (slice(None)))), + ], + ), + ], +) +async def test_accessed_chunks( + shape: tuple[int, ...], chunks: tuple[int, ...], ops: list[tuple[str, tuple[slice, ...]]] +) -> None: + # Test that only the required chunks are accessed during basic selection operations + # shape: array shape + # chunks: chunk size + # ops: list of tuples with (optype, tuple of slices) + # optype = "__getitem__" or "__setitem__", tuple length must match number of dims + + # Use a counting dict as the backing store so we can track the items access + store = await CountingDict.open() + z = zarr_array_from_numpy_array(StorePath(store), np.zeros(shape), chunk_shape=chunks) + + for ii, (optype, slices) in enumerate(ops): + # Resolve the slices into the accessed chunks for each dimension + chunks_per_dim = [] + for N, C, sl in zip(shape, chunks, slices, strict=True): + chunk_ind = np.arange(N, dtype=int)[sl] // C + chunks_per_dim.append(np.unique(chunk_ind)) + + # Combine and generate the cartesian product to determine the chunks keys that + # will be accessed + chunks_accessed = [".".join(map(str, comb)) for comb in itertools.product(*chunks_per_dim)] + + counts_before = store.counter.copy() + + # Perform the operation + if optype == "__getitem__": + z[slices] + else: + z[slices] = ii + + # Get the change in counts + delta_counts = store.counter - counts_before + + # Check that the access counts for the operation have increased by one for all + # the chunks we expect to be included + for ci in chunks_accessed: + assert delta_counts.pop((optype, ci)) == 1 + + # If the chunk was partially written to it will also have been read once. We + # don't determine if the chunk was actually partial here, just that the + # counts are consistent that this might have happened + if optype == "__setitem__": + assert ("__getitem__", ci) not in delta_counts or delta_counts.pop( + ("__getitem__", ci) + ) == 1 + # Check that no other chunks were accessed + assert len(delta_counts) == 0 + + +@pytest.mark.parametrize( + "selection", + [ + # basic selection + [...], + [1, ...], + [slice(None)], + [1, 3], + [[1, 2, 3], 9], + [np.arange(1000)], + [slice(5, 15)], + [slice(2, 4), 4], + [[1, 3]], + # mask selection + [np.tile([True, False], (1000, 5))], + [np.full((1000, 10), False)], + # coordinate selection + [[1, 2, 3, 4], [5, 6, 7, 8]], + [[100, 200, 300], [4, 5, 6]], + ], +) +def test_indexing_equals_numpy(store: StorePath, selection: Selection) -> None: + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + # note: in python 3.10 a[*selection] is not valid unpacking syntax + expected = a[*selection,] + actual = z[*selection,] + assert_array_equal(expected, actual, err_msg=f"selection: {selection}") + + +@pytest.mark.parametrize( + "selection", + [ + [np.tile([True, False], 500), np.tile([True, False], 5)], + [np.full(1000, False), np.tile([True, False], 5)], + [np.full(1000, True), np.full(10, True)], + [np.full(1000, True), [True, False] * 5], + ], +) +def test_orthogonal_bool_indexing_like_numpy_ix( + store: StorePath, selection: list[npt.ArrayLike] +) -> None: + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) + expected = a[np.ix_(*selection)] + # note: in python 3.10 z[*selection] is not valid unpacking syntax + actual = z[*selection,] + assert_array_equal(expected, actual, err_msg=f"{selection=}") + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@pytest.mark.parametrize("origin_0d", [None, (0,), (1,)]) +@pytest.mark.parametrize("selection_shape_0d", [None, (2,), (3,)]) +def test_iter_grid( + ndim: int, origin_0d: tuple[int] | None, selection_shape_0d: tuple[int] | None +) -> None: + """ + Test that iter_grid works as expected for 1, 2, and 3 dimensions. + """ + grid_shape = (5,) * ndim + + if origin_0d is not None: + origin_kwarg = origin_0d * ndim + origin = origin_kwarg + else: + origin_kwarg = None + origin = (0,) * ndim + + if selection_shape_0d is not None: + selection_shape_kwarg = selection_shape_0d * ndim + selection_shape = selection_shape_kwarg + else: + selection_shape_kwarg = None + selection_shape = tuple(gs - o for gs, o in zip(grid_shape, origin, strict=False)) + + observed = tuple( + _iter_grid(grid_shape, origin=origin_kwarg, selection_shape=selection_shape_kwarg) + ) + + # generate a numpy array of indices, and index it + coord_array = np.array(list(itertools.product(*[range(s) for s in grid_shape]))).reshape( + (*grid_shape, ndim) + ) + coord_array_indexed = coord_array[ + tuple(slice(o, o + s, 1) for o, s in zip(origin, selection_shape, strict=False)) + + (range(ndim),) + ] + + expected = tuple(map(tuple, coord_array_indexed.reshape(-1, ndim).tolist())) + assert observed == expected + + +def test_iter_grid_invalid() -> None: + """ + Ensure that a selection_shape that exceeds the grid_shape + origin produces an indexing error. + """ + with pytest.raises(IndexError): + list(_iter_grid((5,), origin=(0,), selection_shape=(10,))) + + +def test_indexing_with_zarr_array(store: StorePath) -> None: + # regression test for https://github.com/zarr-developers/zarr-python/issues/2133 + a = np.arange(10) + za = zarr.array(a, chunks=2, store=store, path="a") + ix = [False, True, False, True, False, True, False, True, False, True] + ii = [0, 2, 4, 5] + + zix = zarr.array(ix, chunks=2, store=store, dtype="bool", path="ix") + zii = zarr.array(ii, chunks=2, store=store, dtype="i4", path="ii") + assert_array_equal(a[ix], za[zix]) + assert_array_equal(a[ix], za.oindex[zix]) + assert_array_equal(a[ix], za.vindex[zix]) + + assert_array_equal(a[ii], za[zii]) + assert_array_equal(a[ii], za.oindex[zii]) diff --git a/tests/v3/test_metadata/__init__.py b/tests/v3/test_metadata/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/v3/test_metadata/test_consolidated.py b/tests/v3/test_metadata/test_consolidated.py new file mode 100644 index 0000000000..c0218602f6 --- /dev/null +++ b/tests/v3/test_metadata/test_consolidated.py @@ -0,0 +1,564 @@ +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +import zarr.api.asynchronous +import zarr.api.synchronous +import zarr.storage +from zarr.api.asynchronous import ( + AsyncGroup, + consolidate_metadata, + group, + open, + open_consolidated, +) +from zarr.core.buffer.core import default_buffer_prototype +from zarr.core.group import ConsolidatedMetadata, GroupMetadata +from zarr.core.metadata import ArrayV3Metadata +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.storage.common import StorePath + +if TYPE_CHECKING: + from zarr.abc.store import Store + from zarr.core.common import ZarrFormat + + +@pytest.fixture +async def memory_store_with_hierarchy(memory_store: Store) -> None: + g = await group(store=memory_store, attributes={"foo": "bar"}) + await g.create_array(name="air", shape=(1, 2, 3)) + await g.create_array(name="lat", shape=(1,)) + await g.create_array(name="lon", shape=(2,)) + await g.create_array(name="time", shape=(3,)) + + child = await g.create_group("child", attributes={"key": "child"}) + await child.create_array("array", shape=(4, 4), attributes={"key": "child"}) + + grandchild = await child.create_group("grandchild", attributes={"key": "grandchild"}) + await grandchild.create_array("array", shape=(4, 4), attributes={"key": "grandchild"}) + await grandchild.create_group("empty_group", attributes={"key": "empty"}) + return memory_store + + +class TestConsolidated: + async def test_open_consolidated_false_raises(self): + store = zarr.storage.MemoryStore() + with pytest.raises(TypeError, match="use_consolidated"): + await zarr.api.asynchronous.open_consolidated(store, use_consolidated=False) + + def test_open_consolidated_false_raises_sync(self): + store = zarr.storage.MemoryStore() + with pytest.raises(TypeError, match="use_consolidated"): + zarr.open_consolidated(store, use_consolidated=False) + + async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: + # TODO: Figure out desired keys in + # TODO: variety in the hierarchies + # More nesting + # arrays under arrays + # single array + # etc. + await consolidate_metadata(memory_store_with_hierarchy) + group2 = await AsyncGroup.open(memory_store_with_hierarchy) + + array_metadata = { + "attributes": {}, + "chunk_key_encoding": { + "configuration": {"separator": "/"}, + "name": "default", + }, + "codecs": ({"configuration": {"endian": "little"}, "name": "bytes"},), + "data_type": "float64", + "fill_value": np.float64(0.0), + "node_type": "array", + # "shape": (1, 2, 3), + "zarr_format": 3, + } + + expected = GroupMetadata( + attributes={"foo": "bar"}, + consolidated_metadata=ConsolidatedMetadata( + kind="inline", + must_understand=False, + metadata={ + "air": ArrayV3Metadata.from_dict( + { + **{ + "shape": (1, 2, 3), + "chunk_grid": { + "configuration": {"chunk_shape": (1, 2, 3)}, + "name": "regular", + }, + }, + **array_metadata, + } + ), + "lat": ArrayV3Metadata.from_dict( + { + **{ + "shape": (1,), + "chunk_grid": { + "configuration": {"chunk_shape": (1,)}, + "name": "regular", + }, + }, + **array_metadata, + } + ), + "lon": ArrayV3Metadata.from_dict( + { + **{"shape": (2,)}, + "chunk_grid": { + "configuration": {"chunk_shape": (2,)}, + "name": "regular", + }, + **array_metadata, + } + ), + "time": ArrayV3Metadata.from_dict( + { + **{ + "shape": (3,), + "chunk_grid": { + "configuration": {"chunk_shape": (3,)}, + "name": "regular", + }, + }, + **array_metadata, + } + ), + "child": GroupMetadata( + attributes={"key": "child"}, + consolidated_metadata=ConsolidatedMetadata( + metadata={ + "array": ArrayV3Metadata.from_dict( + { + **array_metadata, + **{ + "attributes": {"key": "child"}, + "shape": (4, 4), + "chunk_grid": { + "configuration": {"chunk_shape": (4, 4)}, + "name": "regular", + }, + }, + } + ), + "grandchild": GroupMetadata( + attributes={"key": "grandchild"}, + consolidated_metadata=ConsolidatedMetadata( + metadata={ + # known to be empty child group + "empty_group": GroupMetadata( + consolidated_metadata=ConsolidatedMetadata( + metadata={} + ), + attributes={"key": "empty"}, + ), + "array": ArrayV3Metadata.from_dict( + { + **array_metadata, + **{ + "attributes": {"key": "grandchild"}, + "shape": (4, 4), + "chunk_grid": { + "configuration": { + "chunk_shape": (4, 4) + }, + "name": "regular", + }, + }, + } + ), + } + ), + ), + }, + ), + ), + }, + ), + ) + + assert group2.metadata == expected + group3 = await open(store=memory_store_with_hierarchy) + assert group3.metadata == expected + + group4 = await open_consolidated(store=memory_store_with_hierarchy) + assert group4.metadata == expected + + result_raw = json.loads( + ( + await memory_store_with_hierarchy.get( + "zarr.json", prototype=default_buffer_prototype() + ) + ).to_bytes() + )["consolidated_metadata"] + assert result_raw["kind"] == "inline" + assert sorted(result_raw["metadata"]) == [ + "air", + "child", + "child/array", + "child/grandchild", + "child/grandchild/array", + "child/grandchild/empty_group", + "lat", + "lon", + "time", + ] + + def test_consolidated_sync(self, memory_store): + g = zarr.api.synchronous.group(store=memory_store, attributes={"foo": "bar"}) + g.create_array(name="air", shape=(1, 2, 3)) + g.create_array(name="lat", shape=(1,)) + g.create_array(name="lon", shape=(2,)) + g.create_array(name="time", shape=(3,)) + + zarr.api.synchronous.consolidate_metadata(memory_store) + group2 = zarr.api.synchronous.Group.open(memory_store) + + array_metadata = { + "attributes": {}, + "chunk_key_encoding": { + "configuration": {"separator": "/"}, + "name": "default", + }, + "codecs": ({"configuration": {"endian": "little"}, "name": "bytes"},), + "data_type": "float64", + "fill_value": np.float64(0.0), + "node_type": "array", + # "shape": (1, 2, 3), + "zarr_format": 3, + } + + expected = GroupMetadata( + attributes={"foo": "bar"}, + consolidated_metadata=ConsolidatedMetadata( + kind="inline", + must_understand=False, + metadata={ + "air": ArrayV3Metadata.from_dict( + { + **{ + "shape": (1, 2, 3), + "chunk_grid": { + "configuration": {"chunk_shape": (1, 2, 3)}, + "name": "regular", + }, + }, + **array_metadata, + } + ), + "lat": ArrayV3Metadata.from_dict( + { + **{ + "shape": (1,), + "chunk_grid": { + "configuration": {"chunk_shape": (1,)}, + "name": "regular", + }, + }, + **array_metadata, + } + ), + "lon": ArrayV3Metadata.from_dict( + { + **{"shape": (2,)}, + "chunk_grid": { + "configuration": {"chunk_shape": (2,)}, + "name": "regular", + }, + **array_metadata, + } + ), + "time": ArrayV3Metadata.from_dict( + { + **{ + "shape": (3,), + "chunk_grid": { + "configuration": {"chunk_shape": (3,)}, + "name": "regular", + }, + }, + **array_metadata, + } + ), + }, + ), + ) + assert group2.metadata == expected + group3 = zarr.api.synchronous.open(store=memory_store) + assert group3.metadata == expected + + group4 = zarr.api.synchronous.open_consolidated(store=memory_store) + assert group4.metadata == expected + + async def test_not_writable_raises(self, memory_store: zarr.storage.MemoryStore) -> None: + await group(store=memory_store, attributes={"foo": "bar"}) + read_store = zarr.storage.MemoryStore(store_dict=memory_store._store_dict) + with pytest.raises(ValueError, match="does not support writing"): + await consolidate_metadata(read_store) + + async def test_non_root_node(self, memory_store_with_hierarchy: Store) -> None: + await consolidate_metadata(memory_store_with_hierarchy, path="child") + root = await AsyncGroup.open(memory_store_with_hierarchy) + child = await AsyncGroup.open(StorePath(memory_store_with_hierarchy) / "child") + + assert root.metadata.consolidated_metadata is None + assert child.metadata.consolidated_metadata is not None + assert "air" not in child.metadata.consolidated_metadata.metadata + assert "grandchild" in child.metadata.consolidated_metadata.metadata + + def test_consolidated_metadata_from_dict(self): + data = {"must_understand": False} + + # missing kind + with pytest.raises(ValueError, match="kind='None'"): + ConsolidatedMetadata.from_dict(data) + + # invalid kind + data["kind"] = "invalid" + with pytest.raises(ValueError, match="kind='invalid'"): + ConsolidatedMetadata.from_dict(data) + + # missing metadata + data["kind"] = "inline" + + with pytest.raises(TypeError, match="Unexpected type for 'metadata'"): + ConsolidatedMetadata.from_dict(data) + + data["kind"] = "inline" + # empty is fine + data["metadata"] = {} + ConsolidatedMetadata.from_dict(data) + + def test_flatten(self): + array_metadata = { + "attributes": {}, + "chunk_key_encoding": { + "configuration": {"separator": "/"}, + "name": "default", + }, + "codecs": ({"configuration": {"endian": "little"}, "name": "bytes"},), + "data_type": "float64", + "fill_value": np.float64(0.0), + "node_type": "array", + # "shape": (1, 2, 3), + "zarr_format": 3, + } + + metadata = ConsolidatedMetadata( + kind="inline", + must_understand=False, + metadata={ + "air": ArrayV3Metadata.from_dict( + { + **{ + "shape": (1, 2, 3), + "chunk_grid": { + "configuration": {"chunk_shape": (1, 2, 3)}, + "name": "regular", + }, + }, + **array_metadata, + } + ), + "lat": ArrayV3Metadata.from_dict( + { + **{ + "shape": (1,), + "chunk_grid": { + "configuration": {"chunk_shape": (1,)}, + "name": "regular", + }, + }, + **array_metadata, + } + ), + "child": GroupMetadata( + attributes={"key": "child"}, + consolidated_metadata=ConsolidatedMetadata( + metadata={ + "array": ArrayV3Metadata.from_dict( + { + **array_metadata, + **{ + "attributes": {"key": "child"}, + "shape": (4, 4), + "chunk_grid": { + "configuration": {"chunk_shape": (4, 4)}, + "name": "regular", + }, + }, + } + ), + "grandchild": GroupMetadata( + attributes={"key": "grandchild"}, + consolidated_metadata=ConsolidatedMetadata( + metadata={ + "array": ArrayV3Metadata.from_dict( + { + **array_metadata, + **{ + "attributes": {"key": "grandchild"}, + "shape": (4, 4), + "chunk_grid": { + "configuration": {"chunk_shape": (4, 4)}, + "name": "regular", + }, + }, + } + ) + } + ), + ), + }, + ), + ), + }, + ) + result = metadata.flattened_metadata + expected = { + "air": metadata.metadata["air"], + "lat": metadata.metadata["lat"], + "child": GroupMetadata( + attributes={"key": "child"}, consolidated_metadata=ConsolidatedMetadata(metadata={}) + ), + "child/array": metadata.metadata["child"].consolidated_metadata.metadata["array"], + "child/grandchild": GroupMetadata( + attributes={"key": "grandchild"}, + consolidated_metadata=ConsolidatedMetadata(metadata={}), + ), + "child/grandchild/array": ( + metadata.metadata["child"] + .consolidated_metadata.metadata["grandchild"] + .consolidated_metadata.metadata["array"] + ), + } + assert result == expected + + def test_invalid_metadata_raises(self): + payload = { + "kind": "inline", + "must_understand": False, + "metadata": { + "foo": [1, 2, 3] # invalid + }, + } + + with pytest.raises(TypeError, match="key='foo', type='list'"): + ConsolidatedMetadata.from_dict(payload) + + def test_to_dict_empty(self): + meta = ConsolidatedMetadata( + metadata={ + "empty": GroupMetadata( + attributes={"key": "empty"}, + consolidated_metadata=ConsolidatedMetadata(metadata={}), + ) + } + ) + result = meta.to_dict() + expected = { + "kind": "inline", + "must_understand": False, + "metadata": { + "empty": { + "attributes": {"key": "empty"}, + "consolidated_metadata": { + "kind": "inline", + "must_understand": False, + "metadata": {}, + }, + "node_type": "group", + "zarr_format": 3, + } + }, + } + assert result == expected + + @pytest.mark.parametrize("zarr_format", [2, 3]) + async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat): + store = zarr.storage.MemoryStore(mode="w") + await AsyncGroup.from_store(store, zarr_format=zarr_format) + with pytest.raises(ValueError): + await zarr.api.asynchronous.open_consolidated(store, zarr_format=zarr_format) + + with pytest.raises(ValueError): + await zarr.api.asynchronous.open_consolidated(store, zarr_format=None) + + async def test_consolidated_metadata_v2(self): + store = zarr.storage.MemoryStore(mode="w") + g = await AsyncGroup.from_store(store, attributes={"key": "root"}, zarr_format=2) + await g.create_array(name="a", shape=(1,), attributes={"key": "a"}) + g1 = await g.create_group(name="g1", attributes={"key": "g1"}) + await g1.create_group(name="g2", attributes={"key": "g2"}) + + await zarr.api.asynchronous.consolidate_metadata(store) + result = await zarr.api.asynchronous.open_consolidated(store, zarr_format=2) + + expected = GroupMetadata( + attributes={"key": "root"}, + zarr_format=2, + consolidated_metadata=ConsolidatedMetadata( + metadata={ + "a": ArrayV2Metadata( + shape=(1,), + dtype="float64", + attributes={"key": "a"}, + chunks=(1,), + fill_value=None, + order="C", + ), + "g1": GroupMetadata( + attributes={"key": "g1"}, + zarr_format=2, + consolidated_metadata=ConsolidatedMetadata( + metadata={ + "g2": GroupMetadata( + attributes={"key": "g2"}, + zarr_format=2, + consolidated_metadata=ConsolidatedMetadata(metadata={}), + ) + } + ), + ), + } + ), + ) + assert result.metadata == expected + + @pytest.mark.parametrize("zarr_format", [2, 3]) + async def test_use_consolidated_false( + self, memory_store: zarr.storage.MemoryStore, zarr_format: ZarrFormat + ) -> None: + with zarr.config.set(default_zarr_version=zarr_format): + g = await group(store=memory_store, attributes={"foo": "bar"}) + await g.create_group(name="a") + + # test a stale read + await zarr.api.asynchronous.consolidate_metadata(memory_store) + await g.create_group(name="b") + + stale = await zarr.api.asynchronous.open_group(store=memory_store) + assert len([x async for x in stale.members()]) == 1 + assert stale.metadata.consolidated_metadata + assert list(stale.metadata.consolidated_metadata.metadata) == ["a"] + + # bypass stale data + good = await zarr.api.asynchronous.open_group( + store=memory_store, use_consolidated=False + ) + assert len([x async for x in good.members()]) == 2 + + # reconsolidate + await zarr.api.asynchronous.consolidate_metadata(memory_store) + + good = await zarr.api.asynchronous.open_group(store=memory_store) + assert len([x async for x in good.members()]) == 2 + assert good.metadata.consolidated_metadata + assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"] diff --git a/tests/v3/test_metadata/test_v2.py b/tests/v3/test_metadata/test_v2.py new file mode 100644 index 0000000000..089d5c98e1 --- /dev/null +++ b/tests/v3/test_metadata/test_v2.py @@ -0,0 +1,284 @@ +from __future__ import annotations + +import json +from typing import TYPE_CHECKING, Literal + +import numpy as np +import pytest + +import zarr.api.asynchronous +import zarr.storage +from zarr.core.buffer import cpu +from zarr.core.group import ConsolidatedMetadata, GroupMetadata +from zarr.core.metadata import ArrayV2Metadata +from zarr.core.metadata.v2 import parse_zarr_format + +if TYPE_CHECKING: + from typing import Any + + from zarr.abc.codec import Codec + +import numcodecs + + +def test_parse_zarr_format_valid() -> None: + assert parse_zarr_format(2) == 2 + + +@pytest.mark.parametrize("data", [None, 1, 3, 4, 5, "3"]) +def test_parse_zarr_format_invalid(data: Any) -> None: + with pytest.raises(ValueError, match=f"Invalid value. Expected 2. Got {data}"): + parse_zarr_format(data) + + +@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) +@pytest.mark.parametrize("filters", [None, (), (numcodecs.GZip(),)]) +@pytest.mark.parametrize("compressor", [None, numcodecs.GZip()]) +@pytest.mark.parametrize("fill_value", [None, 0, 1]) +@pytest.mark.parametrize("order", ["C", "F"]) +@pytest.mark.parametrize("dimension_separator", [".", "/", None]) +def test_metadata_to_dict( + compressor: Codec | None, + filters: tuple[Codec] | None, + fill_value: Any, + order: Literal["C", "F"], + dimension_separator: Literal[".", "/"] | None, + attributes: None | dict[str, Any], +) -> None: + shape = (1, 2, 3) + chunks = (1,) * len(shape) + data_type = "|u1" + metadata_dict = { + "zarr_format": 2, + "shape": shape, + "chunks": chunks, + "dtype": data_type, + "order": order, + "compressor": compressor, + "filters": filters, + "fill_value": fill_value, + } + + if attributes is not None: + metadata_dict["attributes"] = attributes + if dimension_separator is not None: + metadata_dict["dimension_separator"] = dimension_separator + + metadata = ArrayV2Metadata.from_dict(metadata_dict) + observed = metadata.to_dict() + expected = metadata_dict.copy() + + if attributes is None: + assert observed["attributes"] == {} + observed.pop("attributes") + + if dimension_separator is None: + expected_dimension_sep = "." + assert observed["dimension_separator"] == expected_dimension_sep + observed.pop("dimension_separator") + + assert observed == expected + + +class TestConsolidated: + @pytest.fixture + async def v2_consolidated_metadata( + self, memory_store: zarr.storage.MemoryStore + ) -> zarr.storage.MemoryStore: + zmetadata = { + "metadata": { + ".zattrs": { + "Conventions": "COARDS", + }, + ".zgroup": {"zarr_format": 2}, + "air/.zarray": { + "chunks": [730], + "compressor": None, + "dtype": " None: + data = { + "_nczarr_array": {"dimrefs": ["/dim1", "/dim2"], "storage": "chunked"}, + "attributes": {"key": "value"}, + "chunks": [8], + "compressor": None, + "dtype": " None: + with pytest.raises( + ValueError, match=f"Invalid value for 'zarr_format'. Expected '3'. Got '{data}'." + ): + parse_zarr_format(data) + + +def test_parse_zarr_format_valid() -> None: + assert parse_zarr_format(3) == 3 + + +def test_parse_node_type_valid() -> None: + assert parse_node_type("array") == "array" + assert parse_node_type("group") == "group" + + +@pytest.mark.parametrize("node_type", [None, 2, "other"]) +def test_parse_node_type_invalid(node_type: Any) -> None: + with pytest.raises( + MetadataValidationError, + match=f"Invalid value for 'node_type'. Expected 'array or group'. Got '{node_type}'.", + ): + parse_node_type(node_type) + + +@pytest.mark.parametrize("data", [None, "group"]) +def test_parse_node_type_array_invalid(data: Any) -> None: + with pytest.raises( + ValueError, match=f"Invalid value for 'node_type'. Expected 'array'. Got '{data}'." + ): + parse_node_type_array(data) + + +def test_parse_node_typev_array_alid() -> None: + assert parse_node_type_array("array") == "array" + + +@pytest.mark.parametrize("data", [(), [1, 2, "a"], {"foo": 10}]) +def parse_dimension_names_invalid(data: Any) -> None: + with pytest.raises(TypeError, match="Expected either None or iterable of str,"): + parse_dimension_names(data) + + +@pytest.mark.parametrize("data", [None, ("a", "b", "c"), ["a", "a", "a"]]) +def parse_dimension_names_valid(data: Sequence[str] | None) -> None: + assert parse_dimension_names(data) == data + + +@pytest.mark.parametrize("dtype_str", dtypes) +def test_default_fill_value(dtype_str: str) -> None: + """ + Test that parse_fill_value(None, dtype) results in the 0 value for the given dtype. + """ + dtype = DataType(dtype_str) + fill_value = default_fill_value(dtype) + if dtype == DataType.string: + assert fill_value == "" + elif dtype == DataType.bytes: + assert fill_value == b"" + else: + assert fill_value == dtype.to_numpy().type(0) + + +@pytest.mark.parametrize( + ("fill_value", "dtype_str"), + [ + (True, "bool"), + (False, "bool"), + (-8, "int8"), + (0, "int16"), + (1e10, "uint64"), + (-999, "float32"), + (1e32, "float64"), + (float("NaN"), "float64"), + (np.nan, "float64"), + (np.inf, "float64"), + (-1 * np.inf, "float64"), + (0j, "complex64"), + ], +) +def test_parse_fill_value_valid(fill_value: Any, dtype_str: str) -> None: + """ + Test that parse_fill_value(fill_value, dtype) casts fill_value to the given dtype. + """ + parsed = parse_fill_value(fill_value, dtype_str) + + if np.isnan(fill_value): + assert np.isnan(parsed) + else: + assert parsed == DataType(dtype_str).to_numpy().type(fill_value) + + +@pytest.mark.parametrize("fill_value", ["not a valid value"]) +@pytest.mark.parametrize("dtype_str", [*int_dtypes, *float_dtypes, *complex_dtypes]) +def test_parse_fill_value_invalid_value(fill_value: Any, dtype_str: str) -> None: + """ + Test that parse_fill_value(fill_value, dtype) raises ValueError for invalid values. + This test excludes bool because the bool constructor takes anything. + """ + with pytest.raises(ValueError): + parse_fill_value(fill_value, dtype_str) + + +@pytest.mark.parametrize("fill_value", [[1.0, 0.0], [0, 1], complex(1, 1), np.complex64(0)]) +@pytest.mark.parametrize("dtype_str", [*complex_dtypes]) +def test_parse_fill_value_complex(fill_value: Any, dtype_str: str) -> None: + """ + Test that parse_fill_value(fill_value, dtype) correctly handles complex values represented + as length-2 sequences + """ + dtype = DataType(dtype_str) + if isinstance(fill_value, list): + expected = dtype.to_numpy().type(complex(*fill_value)) + else: + expected = dtype.to_numpy().type(fill_value) + assert expected == parse_fill_value(fill_value, dtype_str) + + +@pytest.mark.parametrize("fill_value", [[1.0, 0.0, 3.0], [0, 1, 3], [1]]) +@pytest.mark.parametrize("dtype_str", [*complex_dtypes]) +def test_parse_fill_value_complex_invalid(fill_value: Any, dtype_str: str) -> None: + """ + Test that parse_fill_value(fill_value, dtype) correctly rejects sequences with length not + equal to 2 + """ + match = ( + f"Got an invalid fill value for complex data type {dtype_str}." + f"Expected a sequence with 2 elements, but {fill_value} has " + f"length {len(fill_value)}." + ) + with pytest.raises(ValueError, match=re.escape(match)): + parse_fill_value(fill_value=fill_value, dtype=dtype_str) + + +@pytest.mark.parametrize("fill_value", [{"foo": 10}]) +@pytest.mark.parametrize("dtype_str", [*int_dtypes, *float_dtypes, *complex_dtypes]) +def test_parse_fill_value_invalid_type(fill_value: Any, dtype_str: str) -> None: + """ + Test that parse_fill_value(fill_value, dtype) raises TypeError for invalid non-sequential types. + This test excludes bool because the bool constructor takes anything. + """ + with pytest.raises(ValueError, match=r"fill value .* is not valid for dtype .*"): + parse_fill_value(fill_value, dtype_str) + + +@pytest.mark.parametrize( + "fill_value", + [ + [ + 1, + ], + (1, 23, 4), + ], +) +@pytest.mark.parametrize("dtype_str", [*int_dtypes, *float_dtypes]) +def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str) -> None: + """ + Test that parse_fill_value(fill_value, dtype) raises TypeError for invalid sequential types. + This test excludes bool because the bool constructor takes anything, and complex because + complex values can be created from length-2 sequences. + """ + match = f"Cannot parse non-string sequence {fill_value} as a scalar with type {dtype_str}" + with pytest.raises(TypeError, match=re.escape(match)): + parse_fill_value(fill_value, dtype_str) + + +@pytest.mark.parametrize("chunk_grid", ["regular"]) +@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) +@pytest.mark.parametrize("codecs", [[BytesCodec()]]) +@pytest.mark.parametrize("fill_value", [0, 1]) +@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"]) +@pytest.mark.parametrize("dimension_separator", [".", "/", None]) +@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"]) +@pytest.mark.parametrize("storage_transformers", [None, ()]) +def test_metadata_to_dict( + chunk_grid: str, + codecs: list[Codec], + fill_value: Any, + chunk_key_encoding: Literal["v2", "default"], + dimension_separator: Literal[".", "/"] | None, + dimension_names: Literal["nones", "strings", "missing"], + attributes: None | dict[str, Any], + storage_transformers: None | tuple[dict[str, JSON]], +) -> None: + shape = (1, 2, 3) + data_type = DataType.uint8 + if chunk_grid == "regular": + cgrid = {"name": "regular", "configuration": {"chunk_shape": (1, 1, 1)}} + + cke: dict[str, Any] + cke_name_dict = {"name": chunk_key_encoding} + if dimension_separator is not None: + cke = cke_name_dict | {"configuration": {"separator": dimension_separator}} + else: + cke = cke_name_dict + dnames: tuple[str | None, ...] | None + + if dimension_names == "strings": + dnames = tuple(map(str, range(len(shape)))) + elif dimension_names == "missing": + dnames = None + elif dimension_names == "nones": + dnames = (None,) * len(shape) + + metadata_dict = { + "zarr_format": 3, + "node_type": "array", + "shape": shape, + "chunk_grid": cgrid, + "data_type": data_type, + "chunk_key_encoding": cke, + "codecs": tuple(c.to_dict() for c in codecs), + "fill_value": fill_value, + "storage_transformers": storage_transformers, + } + + if attributes is not None: + metadata_dict["attributes"] = attributes + if dnames is not None: + metadata_dict["dimension_names"] = dnames + + metadata = ArrayV3Metadata.from_dict(metadata_dict) + observed = metadata.to_dict() + expected = metadata_dict.copy() + + # if unset or None or (), storage_transformers gets normalized to () + assert observed["storage_transformers"] == () + observed.pop("storage_transformers") + expected.pop("storage_transformers") + + if attributes is None: + assert observed["attributes"] == {} + observed.pop("attributes") + + if dimension_separator is None: + if chunk_key_encoding == "default": + expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict() + else: + expected_cke_dict = V2ChunkKeyEncoding(separator=".").to_dict() + assert observed["chunk_key_encoding"] == expected_cke_dict + observed.pop("chunk_key_encoding") + expected.pop("chunk_key_encoding") + assert observed == expected + + +# @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) +# @pytest.mark.parametrize("precision", ["ns", "D"]) +# async def test_datetime_metadata(fill_value: int, precision: str) -> None: +# metadata_dict = { +# "zarr_format": 3, +# "node_type": "array", +# "shape": (1,), +# "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, +# "data_type": f" None: + metadata_dict = { + "zarr_format": 3, + "node_type": "array", + "shape": (1,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": " None: + metadata_dict = { + "zarr_format": 3, + "node_type": "array", + "shape": (1,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": data_type, + "chunk_key_encoding": {"name": "default", "separator": "."}, + "codecs": (), + "fill_value": fill_value, # this is not a valid fill value for uint8 + } + with pytest.raises(ValueError, match=r"fill value .* is not valid for dtype .*"): + ArrayV3Metadata.from_dict(metadata_dict) + + +@pytest.mark.parametrize("fill_value", [("NaN"), "Infinity", "-Infinity"]) +async def test_special_float_fill_values(fill_value: str) -> None: + metadata_dict = { + "zarr_format": 3, + "node_type": "array", + "shape": (1,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": "float64", + "chunk_key_encoding": {"name": "default", "separator": "."}, + "codecs": [{"name": "bytes"}], + "fill_value": fill_value, # this is not a valid fill value for uint8 + } + m = ArrayV3Metadata.from_dict(metadata_dict) + d = json.loads(m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()) + assert m.fill_value is not None + if fill_value == "NaN": + assert np.isnan(m.fill_value) + assert d["fill_value"] == "NaN" + elif fill_value == "Infinity": + assert np.isposinf(m.fill_value) + assert d["fill_value"] == "Infinity" + elif fill_value == "-Infinity": + assert np.isneginf(m.fill_value) + assert d["fill_value"] == "-Infinity" + + +@pytest.mark.parametrize("dtype_str", dtypes) +def test_dtypes(dtype_str: str) -> None: + dt = DataType(dtype_str) + np_dtype = dt.to_numpy() + if dtype_str not in vlen_dtypes: + # we can round trip "normal" dtypes + assert dt == DataType.from_numpy(np_dtype) + assert dt.byte_count == np_dtype.itemsize + assert dt.has_endianness == (dt.byte_count > 1) + else: + # return type for vlen types may vary depending on numpy version + assert dt.byte_count is None diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py new file mode 100644 index 0000000000..380a4d851e --- /dev/null +++ b/tests/v3/test_properties.py @@ -0,0 +1,70 @@ +import numpy as np +import pytest +from numpy.testing import assert_array_equal + +pytest.importorskip("hypothesis") + +import hypothesis.extra.numpy as npst # noqa: E402 +import hypothesis.strategies as st # noqa: E402 +from hypothesis import given # noqa: E402 + +from zarr.testing.strategies import arrays, basic_indices, numpy_arrays, zarr_formats # noqa: E402 + + +@given(data=st.data(), zarr_format=zarr_formats) +def test_roundtrip(data: st.DataObject, zarr_format: int) -> None: + nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format))) + zarray = data.draw(arrays(arrays=st.just(nparray), zarr_formats=st.just(zarr_format))) + assert_array_equal(nparray, zarray[:]) + + +@given(data=st.data()) +def test_basic_indexing(data: st.DataObject) -> None: + zarray = data.draw(arrays()) + nparray = zarray[:] + indexer = data.draw(basic_indices(shape=nparray.shape)) + actual = zarray[indexer] + assert_array_equal(nparray[indexer], actual) + + new_data = np.ones_like(actual) + zarray[indexer] = new_data + nparray[indexer] = new_data + assert_array_equal(nparray, zarray[:]) + + +@given(data=st.data()) +def test_vindex(data: st.DataObject) -> None: + zarray = data.draw(arrays()) + nparray = zarray[:] + + indexer = data.draw( + npst.integer_array_indices( + shape=nparray.shape, result_shape=npst.array_shapes(max_dims=None) + ) + ) + actual = zarray.vindex[indexer] + assert_array_equal(nparray[indexer], actual) + + +# @st.composite +# def advanced_indices(draw, *, shape): +# basic_idxr = draw( +# basic_indices( +# shape=shape, min_dims=len(shape), max_dims=len(shape), allow_ellipsis=False +# ).filter(lambda x: isinstance(x, tuple)) +# ) + +# int_idxr = draw( +# npst.integer_array_indices(shape=shape, result_shape=npst.array_shapes(max_dims=1)) +# ) +# args = tuple( +# st.sampled_from((l, r)) for l, r in zip_longest(basic_idxr, int_idxr, fillvalue=slice(None)) +# ) +# return draw(st.tuples(*args)) + + +# @given(st.data()) +# def test_roundtrip_object_array(data): +# nparray = data.draw(np_arrays) +# zarray = data.draw(arrays(arrays=st.just(nparray))) +# assert_array_equal(nparray, zarray[:]) diff --git a/tests/v3/test_store/__init__.py b/tests/v3/test_store/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py new file mode 100644 index 0000000000..b2a8292ea9 --- /dev/null +++ b/tests/v3/test_store/test_core.py @@ -0,0 +1,67 @@ +import tempfile +from pathlib import Path + +import pytest + +from zarr.storage.common import StoreLike, StorePath, make_store_path +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore +from zarr.storage.remote import RemoteStore + + +async def test_make_store_path(tmpdir: str) -> None: + # None + store_path = await make_store_path(None) + assert isinstance(store_path.store, MemoryStore) + + # str + store_path = await make_store_path(str(tmpdir)) + assert isinstance(store_path.store, LocalStore) + assert Path(store_path.store.root) == Path(tmpdir) + + # Path + store_path = await make_store_path(Path(tmpdir)) + assert isinstance(store_path.store, LocalStore) + assert Path(store_path.store.root) == Path(tmpdir) + + # Store + store_path = await make_store_path(store_path.store) + assert isinstance(store_path.store, LocalStore) + assert Path(store_path.store.root) == Path(tmpdir) + + # StorePath + store_path = await make_store_path(store_path) + assert isinstance(store_path.store, LocalStore) + assert Path(store_path.store.root) == Path(tmpdir) + + with pytest.raises(TypeError): + await make_store_path(1) # type: ignore[arg-type] + + +async def test_make_store_path_fsspec(monkeypatch) -> None: + import fsspec.implementations.memory + + monkeypatch.setattr(fsspec.implementations.memory.MemoryFileSystem, "async_impl", True) + store_path = await make_store_path("memory://") + assert isinstance(store_path.store, RemoteStore) + + +@pytest.mark.parametrize( + "store_like", + [ + None, + str(tempfile.TemporaryDirectory()), + Path(tempfile.TemporaryDirectory().name), + StorePath(store=MemoryStore(store_dict={}, mode="w"), path="/"), + MemoryStore(store_dict={}, mode="w"), + {}, + ], +) +async def test_make_store_path_storage_options_raises(store_like: StoreLike) -> None: + with pytest.raises(TypeError, match="storage_options"): + await make_store_path(store_like, storage_options={"foo": "bar"}) + + +async def test_unsupported() -> None: + with pytest.raises(TypeError, match="Unsupported type for store_like: 'int'"): + await make_store_path(1) # type: ignore[arg-type] diff --git a/tests/v3/test_store/test_local.py b/tests/v3/test_store/test_local.py new file mode 100644 index 0000000000..5352e3520a --- /dev/null +++ b/tests/v3/test_store/test_local.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import zarr +from zarr.core.buffer import Buffer, cpu +from zarr.storage.local import LocalStore +from zarr.testing.store import StoreTests + +if TYPE_CHECKING: + import pathlib + + +class TestLocalStore(StoreTests[LocalStore, cpu.Buffer]): + store_cls = LocalStore + buffer_cls = cpu.Buffer + + async def get(self, store: LocalStore, key: str) -> Buffer: + return self.buffer_cls.from_bytes((store.root / key).read_bytes()) + + async def set(self, store: LocalStore, key: str, value: Buffer) -> None: + parent = (store.root / key).parent + if not parent.exists(): + parent.mkdir(parents=True) + (store.root / key).write_bytes(value.to_bytes()) + + @pytest.fixture + def store_kwargs(self, tmpdir) -> dict[str, str]: + return {"root": str(tmpdir), "mode": "r+"} + + def test_store_repr(self, store: LocalStore) -> None: + assert str(store) == f"file://{store.root!s}" + + def test_store_supports_writes(self, store: LocalStore) -> None: + assert store.supports_writes + + def test_store_supports_partial_writes(self, store: LocalStore) -> None: + assert store.supports_partial_writes + + def test_store_supports_listing(self, store: LocalStore) -> None: + assert store.supports_listing + + async def test_empty_with_empty_subdir(self, store: LocalStore) -> None: + assert await store.empty() + (store.root / "foo/bar").mkdir(parents=True) + assert await store.empty() + + def test_creates_new_directory(self, tmp_path: pathlib.Path): + target = tmp_path.joinpath("a", "b", "c") + assert not target.exists() + + store = self.store_cls(root=target, mode="w") + zarr.group(store=store) diff --git a/tests/v3/test_store/test_logging.py b/tests/v3/test_store/test_logging.py new file mode 100644 index 0000000000..0258244c50 --- /dev/null +++ b/tests/v3/test_store/test_logging.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import zarr +import zarr.storage +from zarr.core.buffer import default_buffer_prototype +from zarr.storage.logging import LoggingStore + +if TYPE_CHECKING: + from zarr.abc.store import Store + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +async def test_logging_store(store: Store, caplog) -> None: + wrapped = LoggingStore(store=store, log_level="DEBUG") + buffer = default_buffer_prototype().buffer + + caplog.clear() + res = await wrapped.set("foo/bar/c/0", buffer.from_bytes(b"\x01\x02\x03\x04")) + assert res is None + assert len(caplog.record_tuples) == 2 + for tup in caplog.record_tuples: + assert str(store) in tup[0] + assert f"Calling {type(store).__name__}.set" in caplog.record_tuples[0][2] + assert f"Finished {type(store).__name__}.set" in caplog.record_tuples[1][2] + + caplog.clear() + keys = [k async for k in wrapped.list()] + assert keys == ["foo/bar/c/0"] + assert len(caplog.record_tuples) == 2 + for tup in caplog.record_tuples: + assert str(store) in tup[0] + assert f"Calling {type(store).__name__}.list" in caplog.record_tuples[0][2] + assert f"Finished {type(store).__name__}.list" in caplog.record_tuples[1][2] + + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +async def test_logging_store_counter(store: Store) -> None: + wrapped = LoggingStore(store=store, log_level="DEBUG") + + arr = zarr.create(shape=(10,), store=wrapped, overwrite=True) + arr[:] = 1 + + assert wrapped.counter["set"] == 2 + assert wrapped.counter["get"] == 0 # 1 if overwrite=False + assert wrapped.counter["list"] == 0 + assert wrapped.counter["list_dir"] == 0 + assert wrapped.counter["list_prefix"] == 0 + + +async def test_with_mode(): + wrapped = LoggingStore(store=zarr.storage.MemoryStore(mode="w"), log_level="INFO") + new = wrapped.with_mode(mode="r") + assert new.mode.str == "r" + assert new.log_level == "INFO" diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py new file mode 100644 index 0000000000..bcd9fc4448 --- /dev/null +++ b/tests/v3/test_store/test_memory.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import pytest + +from zarr.core.buffer import Buffer, cpu, gpu +from zarr.storage.memory import GpuMemoryStore, MemoryStore +from zarr.testing.store import StoreTests +from zarr.testing.utils import gpu_test + + +class TestMemoryStore(StoreTests[MemoryStore, cpu.Buffer]): + store_cls = MemoryStore + buffer_cls = cpu.Buffer + + async def set(self, store: MemoryStore, key: str, value: Buffer) -> None: + store._store_dict[key] = value + + async def get(self, store: MemoryStore, key: str) -> Buffer: + return store._store_dict[key] + + @pytest.fixture(params=[None, True]) + def store_kwargs( + self, request: pytest.FixtureRequest + ) -> dict[str, str | None | dict[str, Buffer]]: + kwargs = {"store_dict": None, "mode": "r+"} + if request.param is True: + kwargs["store_dict"] = {} + return kwargs + + @pytest.fixture + def store(self, store_kwargs: str | None | dict[str, Buffer]) -> MemoryStore: + return self.store_cls(**store_kwargs) + + def test_store_repr(self, store: MemoryStore) -> None: + assert str(store) == f"memory://{id(store._store_dict)}" + + def test_store_supports_writes(self, store: MemoryStore) -> None: + assert store.supports_writes + + def test_store_supports_listing(self, store: MemoryStore) -> None: + assert store.supports_listing + + def test_store_supports_partial_writes(self, store: MemoryStore) -> None: + assert store.supports_partial_writes + + def test_list_prefix(self, store: MemoryStore) -> None: + assert True + + +@gpu_test +class TestGpuMemoryStore(StoreTests[GpuMemoryStore, gpu.Buffer]): + store_cls = GpuMemoryStore + buffer_cls = gpu.Buffer + + async def set(self, store: GpuMemoryStore, key: str, value: Buffer) -> None: + store._store_dict[key] = value + + async def get(self, store: MemoryStore, key: str) -> Buffer: + return store._store_dict[key] + + @pytest.fixture(params=[None, True]) + def store_kwargs( + self, request: pytest.FixtureRequest + ) -> dict[str, str | None | dict[str, Buffer]]: + kwargs = {"store_dict": None, "mode": "r+"} + if request.param is True: + kwargs["store_dict"] = {} + return kwargs + + @pytest.fixture + def store(self, store_kwargs: str | None | dict[str, gpu.Buffer]) -> GpuMemoryStore: + return self.store_cls(**store_kwargs) + + def test_store_repr(self, store: GpuMemoryStore) -> None: + assert str(store) == f"gpumemory://{id(store._store_dict)}" + + def test_store_supports_writes(self, store: GpuMemoryStore) -> None: + assert store.supports_writes + + def test_store_supports_listing(self, store: GpuMemoryStore) -> None: + assert store.supports_listing + + def test_store_supports_partial_writes(self, store: GpuMemoryStore) -> None: + assert store.supports_partial_writes + + def test_list_prefix(self, store: GpuMemoryStore) -> None: + assert True + + def test_dict_reference(self, store: GpuMemoryStore) -> None: + store_dict = {} + result = GpuMemoryStore(store_dict=store_dict) + assert result._store_dict is store_dict + + def test_from_dict(self): + d = { + "a": gpu.Buffer.from_bytes(b"aaaa"), + "b": cpu.Buffer.from_bytes(b"bbbb"), + } + result = GpuMemoryStore.from_dict(d) + for v in result._store_dict.values(): + assert type(v) is gpu.Buffer diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py new file mode 100644 index 0000000000..c8e9a162b0 --- /dev/null +++ b/tests/v3/test_store/test_remote.py @@ -0,0 +1,195 @@ +from __future__ import annotations + +import json +import os +from typing import TYPE_CHECKING + +import fsspec +import pytest +from botocore.session import Session +from upath import UPath + +import zarr.api.asynchronous +from zarr.core.buffer import Buffer, cpu, default_buffer_prototype +from zarr.core.sync import _collect_aiterator, sync +from zarr.storage import RemoteStore +from zarr.testing.store import StoreTests + +if TYPE_CHECKING: + from collections.abc import Generator + + import botocore.client + + +s3fs = pytest.importorskip("s3fs") +requests = pytest.importorskip("requests") +moto_server = pytest.importorskip("moto.moto_server.threaded_moto_server") +moto = pytest.importorskip("moto") + +# ### amended from s3fs ### # +test_bucket_name = "test" +secure_bucket_name = "test-secure" +port = 5555 +endpoint_url = f"http://127.0.0.1:{port}/" + + +@pytest.fixture(scope="module") +def s3_base() -> Generator[None, None, None]: + # writable local S3 system + + # This fixture is module-scoped, meaning that we can reuse the MotoServer across all tests + server = moto_server.ThreadedMotoServer(ip_address="127.0.0.1", port=port) + server.start() + if "AWS_SECRET_ACCESS_KEY" not in os.environ: + os.environ["AWS_SECRET_ACCESS_KEY"] = "foo" + if "AWS_ACCESS_KEY_ID" not in os.environ: + os.environ["AWS_ACCESS_KEY_ID"] = "foo" + + yield + server.stop() + + +def get_boto3_client() -> botocore.client.BaseClient: + # NB: we use the sync botocore client for setup + session = Session() + return session.create_client("s3", endpoint_url=endpoint_url) + + +@pytest.fixture(autouse=True) +def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]: + """ + Quoting Martin Durant: + pytest-asyncio creates a new event loop for each async test. + When an async-mode s3fs instance is made from async, it will be assigned to the loop from + which it is made. That means that if you use s3fs again from a subsequent test, + you will have the same identical instance, but be running on a different loop - which fails. + + For the rest: it's very convenient to clean up the state of the store between tests, + make sure we start off blank each time. + + https://github.com/zarr-developers/zarr-python/pull/1785#discussion_r1634856207 + """ + client = get_boto3_client() + client.create_bucket(Bucket=test_bucket_name, ACL="public-read") + s3fs.S3FileSystem.clear_instance_cache() + s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_url}) + session = sync(s3.set_session()) + s3.invalidate_cache() + yield s3 + requests.post(f"{endpoint_url}/moto-api/reset") + client.close() + sync(session.close()) + + +# ### end from s3fs ### # + + +async def test_basic() -> None: + store = RemoteStore.from_url( + f"s3://{test_bucket_name}", + mode="w", + storage_options={"endpoint_url": endpoint_url, "anon": False}, + ) + assert await _collect_aiterator(store.list()) == () + assert not await store.exists("foo") + data = b"hello" + await store.set("foo", cpu.Buffer.from_bytes(data)) + assert await store.exists("foo") + assert (await store.get("foo", prototype=default_buffer_prototype())).to_bytes() == data + out = await store.get_partial_values( + prototype=default_buffer_prototype(), key_ranges=[("foo", (1, None))] + ) + assert out[0].to_bytes() == data[1:] + + +class TestRemoteStoreS3(StoreTests[RemoteStore, cpu.Buffer]): + store_cls = RemoteStore + buffer_cls = cpu.Buffer + + @pytest.fixture + def store_kwargs(self, request) -> dict[str, str | bool]: + fs, path = fsspec.url_to_fs( + f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False + ) + return {"fs": fs, "path": path, "mode": "r+"} + + @pytest.fixture + def store(self, store_kwargs: dict[str, str | bool]) -> RemoteStore: + return self.store_cls(**store_kwargs) + + async def get(self, store: RemoteStore, key: str) -> Buffer: + # make a new, synchronous instance of the filesystem because this test is run in sync code + new_fs = fsspec.filesystem( + "s3", endpoint_url=store.fs.endpoint_url, anon=store.fs.anon, asynchronous=False + ) + return self.buffer_cls.from_bytes(new_fs.cat(f"{store.path}/{key}")) + + async def set(self, store: RemoteStore, key: str, value: Buffer) -> None: + # make a new, synchronous instance of the filesystem because this test is run in sync code + new_fs = fsspec.filesystem( + "s3", endpoint_url=store.fs.endpoint_url, anon=store.fs.anon, asynchronous=False + ) + new_fs.write_bytes(f"{store.path}/{key}", value.to_bytes()) + + def test_store_repr(self, store: RemoteStore) -> None: + assert str(store) == "" + + def test_store_supports_writes(self, store: RemoteStore) -> None: + assert store.supports_writes + + def test_store_supports_partial_writes(self, store: RemoteStore) -> None: + assert not store.supports_partial_writes + + def test_store_supports_listing(self, store: RemoteStore) -> None: + assert store.supports_listing + + async def test_remote_store_from_uri( + self, store: RemoteStore, store_kwargs: dict[str, str | bool] + ): + storage_options = { + "endpoint_url": endpoint_url, + "anon": False, + } + + meta = {"attributes": {"key": "value"}, "zarr_format": 3, "node_type": "group"} + + await store.set( + "zarr.json", + self.buffer_cls.from_bytes(json.dumps(meta).encode()), + ) + group = await zarr.api.asynchronous.open_group( + store=f"s3://{test_bucket_name}", storage_options=storage_options + ) + assert dict(group.attrs) == {"key": "value"} + + meta["attributes"]["key"] = "value-2" + await store.set( + "directory-2/zarr.json", + self.buffer_cls.from_bytes(json.dumps(meta).encode()), + ) + group = await zarr.api.asynchronous.open_group( + store=f"s3://{test_bucket_name}/directory-2", storage_options=storage_options + ) + assert dict(group.attrs) == {"key": "value-2"} + + meta["attributes"]["key"] = "value-3" + await store.set( + "directory-3/zarr.json", + self.buffer_cls.from_bytes(json.dumps(meta).encode()), + ) + group = await zarr.api.asynchronous.open_group( + store=f"s3://{test_bucket_name}", path="directory-3", storage_options=storage_options + ) + assert dict(group.attrs) == {"key": "value-3"} + + def test_from_upath(self) -> None: + path = UPath(f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False) + result = RemoteStore.from_upath(path) + assert result.fs.endpoint_url == endpoint_url + + async def test_empty_nonexistent_path(self, store_kwargs) -> None: + # regression test for https://github.com/zarr-developers/zarr-python/pull/2343 + store_kwargs["mode"] = "w-" + store_kwargs["path"] += "/abc" + store = await self.store_cls.open(**store_kwargs) + assert await store.empty() diff --git a/tests/v3/test_store/test_stateful_store.py b/tests/v3/test_store/test_stateful_store.py new file mode 100644 index 0000000000..9ac3bbc3f6 --- /dev/null +++ b/tests/v3/test_store/test_stateful_store.py @@ -0,0 +1,249 @@ +# Stateful tests for arbitrary Zarr stores. +import hypothesis.strategies as st +import pytest +from hypothesis import assume, note +from hypothesis.stateful import ( + RuleBasedStateMachine, + Settings, + initialize, + invariant, + precondition, + rule, + run_state_machine_as_test, +) +from hypothesis.strategies import DataObject + +import zarr +from zarr.abc.store import AccessMode, Store +from zarr.core.buffer import BufferPrototype, cpu, default_buffer_prototype +from zarr.storage import LocalStore, ZipStore +from zarr.testing.strategies import key_ranges +from zarr.testing.strategies import keys as zarr_keys + +MAX_BINARY_SIZE = 100 + + +class SyncStoreWrapper(zarr.core.sync.SyncMixin): + def __init__(self, store: Store) -> None: + """Synchronous Store wrapper + + This class holds synchronous methods that map to async methods of Store classes. + The synchronous wrapper is needed because hypothesis' stateful testing infra does + not support asyncio so we redefine sync versions of the Store API. + https://github.com/HypothesisWorks/hypothesis/issues/3712#issuecomment-1668999041 + """ + self.store = store + + @property + def mode(self) -> AccessMode: + return self.store.mode + + def set(self, key: str, data_buffer: zarr.core.buffer.Buffer) -> None: + return self._sync(self.store.set(key, data_buffer)) + + def list(self) -> list: + return self._sync_iter(self.store.list()) + + def get(self, key: str, prototype: BufferPrototype) -> zarr.core.buffer.Buffer: + return self._sync(self.store.get(key, prototype=prototype)) + + def get_partial_values( + self, key_ranges: list, prototype: BufferPrototype + ) -> zarr.core.buffer.Buffer: + return self._sync(self.store.get_partial_values(prototype=prototype, key_ranges=key_ranges)) + + def delete(self, path: str) -> None: + return self._sync(self.store.delete(path)) + + def empty(self) -> bool: + return self._sync(self.store.empty()) + + def clear(self) -> None: + return self._sync(self.store.clear()) + + def exists(self, key) -> bool: + return self._sync(self.store.exists(key)) + + def list_dir(self, prefix): + raise NotImplementedError + + def list_prefix(self, prefix: str): + raise NotImplementedError + + def set_partial_values(self, key_start_values): + raise NotImplementedError + + @property + def supports_listing(self) -> bool: + return self.store.supports_listing + + @property + def supports_partial_writes(self) -> bool: + return self.supports_partial_writes + + @property + def supports_writes(self) -> bool: + return self.store.supports_writes + + +class ZarrStoreStateMachine(RuleBasedStateMachine): + """ " + Zarr store state machine + + This is a subclass of a Hypothesis RuleBasedStateMachine. + It is testing a framework to ensure that the state of a Zarr store matches + an expected state after a set of random operations. It contains a store + (currently, a Zarr MemoryStore) and a model, a simplified version of a + zarr store (in this case, a dict). It also contains rules which represent + actions that can be applied to a zarr store. Rules apply an action to both + the store and the model, and invariants assert that the state of the model + is equal to the state of the store. Hypothesis then generates sequences of + rules, running invariants after each rule. It raises an error if a sequence + produces discontinuity between state of the model and state of the store + (ie. an invariant is violated). + https://hypothesis.readthedocs.io/en/latest/stateful.html + """ + + def __init__(self, store: Store) -> None: + super().__init__() + self.model: dict[str, bytes] = {} + self.store = SyncStoreWrapper(store) + self.prototype = default_buffer_prototype() + + @initialize() + def init_store(self): + self.store.clear() + + @rule(key=zarr_keys, data=st.binary(min_size=0, max_size=MAX_BINARY_SIZE)) + def set(self, key: str, data: DataObject) -> None: + note(f"(set) Setting {key!r} with {data}") + assert not self.store.mode.readonly + data_buf = cpu.Buffer.from_bytes(data) + self.store.set(key, data_buf) + self.model[key] = data_buf + + @precondition(lambda self: len(self.model.keys()) > 0) + @rule(key=zarr_keys, data=st.data()) + def get(self, key: str, data: DataObject) -> None: + key = data.draw( + st.sampled_from(sorted(self.model.keys())) + ) # hypothesis wants to sample from sorted list + note("(get)") + store_value = self.store.get(key, self.prototype) + # to bytes here necessary because data_buf set to model in set() + assert self.model[key].to_bytes() == (store_value.to_bytes()) + + @rule(key=zarr_keys, data=st.data()) + def get_invalid_zarr_keys(self, key: str, data: DataObject) -> None: + note("(get_invalid)") + assume(key not in self.model) + assert self.store.get(key, self.prototype) is None + + @precondition(lambda self: len(self.model.keys()) > 0) + @rule(data=st.data()) + def get_partial_values(self, data: DataObject) -> None: + key_range = data.draw( + key_ranges(keys=st.sampled_from(sorted(self.model.keys())), max_size=MAX_BINARY_SIZE) + ) + note(f"(get partial) {key_range=}") + obs_maybe = self.store.get_partial_values(key_range, self.prototype) + observed = [] + + for obs in obs_maybe: + assert obs is not None + observed.append(obs.to_bytes()) + + model_vals_ls = [] + + for key, byte_range in key_range: + start = byte_range[0] or 0 + step = byte_range[1] + stop = start + step if step is not None else None + model_vals_ls.append(self.model[key][start:stop]) + + assert all( + obs == exp.to_bytes() for obs, exp in zip(observed, model_vals_ls, strict=True) + ), ( + observed, + model_vals_ls, + ) + + @precondition(lambda self: len(self.model.keys()) > 0) + @rule(data=st.data()) + def delete(self, data: DataObject) -> None: + key = data.draw(st.sampled_from(sorted(self.model.keys()))) + note(f"(delete) Deleting {key=}") + + self.store.delete(key) + del self.model[key] + + @rule() + def clear(self) -> None: + assert not self.store.mode.readonly + note("(clear)") + self.store.clear() + self.model.clear() + + assert self.store.empty() + + assert len(self.model.keys()) == len(list(self.store.list())) == 0 + + @rule() + # Local store can be non-empty when there are subdirectories but no files + @precondition(lambda self: not isinstance(self.store.store, LocalStore)) + def empty(self) -> None: + note("(empty)") + + # make sure they either both are or both aren't empty (same state) + assert self.store.empty() == (not self.model) + + @rule(key=zarr_keys) + def exists(self, key: str) -> None: + note("(exists)") + + assert self.store.exists(key) == (key in self.model) + + @invariant() + def check_paths_equal(self) -> None: + note("Checking that paths are equal") + paths = sorted(self.store.list()) + + assert sorted(self.model.keys()) == paths + + @invariant() + def check_vals_equal(self) -> None: + note("Checking values equal") + for key, val in self.model.items(): + store_item = self.store.get(key, self.prototype).to_bytes() + assert val.to_bytes() == store_item + + @invariant() + def check_num_zarr_keys_equal(self) -> None: + note("check num zarr_keys equal") + + assert len(self.model) == len(list(self.store.list())) + + @invariant() + def check_zarr_keys(self) -> None: + keys = list(self.store.list()) + + if not keys: + assert self.store.empty() is True + + else: + assert self.store.empty() is False + + for key in keys: + assert self.store.exists(key) is True + note("checking keys / exists / empty") + + +def test_zarr_hierarchy(sync_store: Store) -> None: + def mk_test_instance_sync() -> None: + return ZarrStoreStateMachine(sync_store) + + if isinstance(sync_store, ZipStore): + pytest.skip(reason="ZipStore does not support delete") + if isinstance(sync_store, LocalStore): + pytest.skip(reason="This test has errors") + run_state_machine_as_test(mk_test_instance_sync, settings=Settings(report_multiple_bugs=True)) diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py new file mode 100644 index 0000000000..d05422ecde --- /dev/null +++ b/tests/v3/test_store/test_zip.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import os +import tempfile +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +import zarr +from zarr.abc.store import AccessMode +from zarr.core.buffer import Buffer, cpu, default_buffer_prototype +from zarr.storage.zip import ZipStore +from zarr.testing.store import StoreTests + +if TYPE_CHECKING: + from collections.abc import Coroutine + from typing import Any + + +class TestZipStore(StoreTests[ZipStore, cpu.Buffer]): + store_cls = ZipStore + buffer_cls = cpu.Buffer + + @pytest.fixture + def store_kwargs(self, request) -> dict[str, str | bool]: + fd, temp_path = tempfile.mkstemp() + os.close(fd) + + return {"path": temp_path, "mode": "w"} + + async def get(self, store: ZipStore, key: str) -> Buffer: + return store._get(key, prototype=default_buffer_prototype()) + + async def set(self, store: ZipStore, key: str, value: Buffer) -> None: + return store._set(key, value) + + def test_store_mode(self, store: ZipStore, store_kwargs: dict[str, Any]) -> None: + assert store.mode == AccessMode.from_literal(store_kwargs["mode"]) + assert not store.mode.readonly + + async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> None: + # we need to create the zipfile in write mode before switching to read mode + store = await self.store_cls.open(**store_kwargs) + store.close() + + kwargs = {**store_kwargs, "mode": "r"} + store = await self.store_cls.open(**kwargs) + assert store.mode == AccessMode.from_literal("r") + assert store.mode.readonly + + # set + with pytest.raises(ValueError): + await store.set("foo", cpu.Buffer.from_bytes(b"bar")) + + def test_store_repr(self, store: ZipStore) -> None: + assert str(store) == f"zip://{store.path!s}" + + def test_store_supports_writes(self, store: ZipStore) -> None: + assert store.supports_writes + + def test_store_supports_partial_writes(self, store: ZipStore) -> None: + assert store.supports_partial_writes is False + + def test_store_supports_listing(self, store: ZipStore) -> None: + assert store.supports_listing + + def test_delete(self, store: ZipStore) -> Coroutine[Any, Any, None]: + pass + + def test_api_integration(self, store: ZipStore) -> None: + root = zarr.open_group(store=store) + + data = np.arange(10000, dtype=np.uint16).reshape(100, 100) + z = root.create_array( + shape=data.shape, chunks=(10, 10), name="foo", dtype=np.uint16, fill_value=99 + ) + z[:] = data + + assert np.array_equal(data, z[:]) + + # you can overwrite existing chunks but zipfile will issue a warning + with pytest.warns(UserWarning, match="Duplicate name: 'foo/c/0/0'"): + z[0, 0] = 100 + + # TODO: assigning an entire chunk to fill value ends up deleting the chunk which is not supported + # a work around will be needed here. + with pytest.raises(NotImplementedError): + z[0:10, 0:10] = 99 + + bar = root.create_group("bar", attributes={"hello": "world"}) + assert "hello" in dict(bar.attrs) + + # keys cannot be deleted + with pytest.raises(NotImplementedError): + del root["bar"] + + store.close() + + async def test_with_mode(self, store: ZipStore) -> None: + with pytest.raises(NotImplementedError, match="new mode"): + await super().test_with_mode(store) + + @pytest.mark.parametrize("mode", ["a", "w"]) + async def test_store_open_mode(self, store_kwargs: dict[str, Any], mode: str) -> None: + super().test_store_open_mode(store_kwargs, mode) diff --git a/tests/v3/test_sync.py b/tests/v3/test_sync.py new file mode 100644 index 0000000000..20dbf33d95 --- /dev/null +++ b/tests/v3/test_sync.py @@ -0,0 +1,161 @@ +import asyncio +from collections.abc import AsyncGenerator +from unittest.mock import AsyncMock, patch + +import pytest + +import zarr +from zarr.core.sync import ( + SyncError, + SyncMixin, + _get_executor, + _get_lock, + _get_loop, + cleanup_resources, + sync, +) +from zarr.storage.memory import MemoryStore + + +@pytest.fixture(params=[True, False]) +def sync_loop(request: pytest.FixtureRequest) -> asyncio.AbstractEventLoop | None: + if request.param is True: + return _get_loop() + else: + return None + + +@pytest.fixture +def clean_state(): + # use this fixture to make sure no existing threads/loops exist in zarr.core.sync + cleanup_resources() + yield + cleanup_resources() + + +def test_get_loop() -> None: + # test that calling _get_loop() twice returns the same loop + loop = _get_loop() + loop2 = _get_loop() + assert loop is loop2 + + +def test_get_lock() -> None: + # test that calling _get_lock() twice returns the same lock + lock = _get_lock() + lock2 = _get_lock() + assert lock is lock2 + + +def test_sync(sync_loop: asyncio.AbstractEventLoop | None) -> None: + foo = AsyncMock(return_value="foo") + assert sync(foo(), loop=sync_loop) == "foo" + foo.assert_awaited_once() + + +def test_sync_raises(sync_loop: asyncio.AbstractEventLoop | None) -> None: + foo = AsyncMock(side_effect=ValueError("foo-bar")) + with pytest.raises(ValueError, match="foo-bar"): + sync(foo(), loop=sync_loop) + foo.assert_awaited_once() + + +def test_sync_timeout() -> None: + duration = 0.002 + + async def foo() -> None: + await asyncio.sleep(duration) + + with pytest.raises(asyncio.TimeoutError): + sync(foo(), timeout=duration / 2) + + +def test_sync_raises_if_no_coroutine(sync_loop: asyncio.AbstractEventLoop | None) -> None: + def foo() -> str: + return "foo" + + with pytest.raises(TypeError): + sync(foo(), loop=sync_loop) # type: ignore[arg-type] + + +@pytest.mark.filterwarnings("ignore:coroutine.*was never awaited") +def test_sync_raises_if_loop_is_closed() -> None: + loop = _get_loop() + + foo = AsyncMock(return_value="foo") + with patch.object(loop, "is_closed", return_value=True): + with pytest.raises(RuntimeError): + sync(foo(), loop=loop) + foo.assert_not_awaited() + + +@pytest.mark.filterwarnings("ignore:coroutine.*was never awaited") +def test_sync_raises_if_calling_sync_from_within_a_running_loop( + sync_loop: asyncio.AbstractEventLoop | None, +) -> None: + def foo() -> str: + # technically, this should be an async function but doing that + # yields a warning because it is never awaited by the inner function + return "foo" + + async def bar() -> str: + return sync(foo(), loop=sync_loop) # type: ignore[arg-type] + + with pytest.raises(SyncError): + sync(bar(), loop=sync_loop) + + +@pytest.mark.filterwarnings("ignore:coroutine.*was never awaited") +def test_sync_raises_if_loop_is_invalid_type() -> None: + foo = AsyncMock(return_value="foo") + with pytest.raises(TypeError): + sync(foo(), loop=1) # type: ignore[arg-type] + foo.assert_not_awaited() + + +def test_sync_mixin(sync_loop) -> None: + class AsyncFoo: + def __init__(self) -> None: + pass + + async def foo(self) -> str: + return "foo" + + async def bar(self) -> AsyncGenerator: + for i in range(10): + yield i + + class SyncFoo(SyncMixin): + def __init__(self, async_foo: AsyncFoo) -> None: + self._async_foo = async_foo + + def foo(self) -> str: + return self._sync(self._async_foo.foo()) + + def bar(self) -> list[int]: + return self._sync_iter(self._async_foo.bar()) + + async_foo = AsyncFoo() + foo = SyncFoo(async_foo) + assert foo.foo() == "foo" + assert foo.bar() == list(range(10)) + + +def test_open_positional_args_deprecate(): + store = MemoryStore({}, mode="w") + with pytest.warns(FutureWarning, match="pass"): + zarr.open(store, "w", shape=(1,)) + + +@pytest.mark.parametrize("workers", [None, 1, 2]) # +def test_get_executor(clean_state, workers) -> None: + with zarr.config.set({"threading.max_workers": workers}): + e = _get_executor() + if workers is not None and workers != 0: + assert e._max_workers == workers + + +def test_cleanup_resources_idempotent() -> None: + _get_executor() # trigger resource creation (iothread, loop, thread-pool) + cleanup_resources() + cleanup_resources() diff --git a/tests/v3/test_v2.py b/tests/v3/test_v2.py new file mode 100644 index 0000000000..729ed0533f --- /dev/null +++ b/tests/v3/test_v2.py @@ -0,0 +1,125 @@ +import json +from collections.abc import Iterator +from typing import Any + +import numcodecs.vlen +import numpy as np +import pytest +from numcodecs import Delta +from numcodecs.blosc import Blosc + +import zarr +import zarr.core.buffer.cpu +import zarr.core.metadata +import zarr.storage +from zarr import Array +from zarr.storage import MemoryStore, StorePath + + +@pytest.fixture +async def store() -> Iterator[StorePath]: + return StorePath(await MemoryStore.open(mode="w")) + + +def test_simple(store: StorePath) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + + a = Array.create( + store / "simple_v2", + zarr_format=2, + shape=data.shape, + chunks=(16, 16), + dtype=data.dtype, + fill_value=0, + ) + + a[:, :] = data + assert np.array_equal(data, a[:, :]) + + +@pytest.mark.parametrize( + ("dtype", "fill_value"), + [ + ("bool", False), + ("int64", 0), + ("float64", 0.0), + ("|S1", b""), + ("|U1", ""), + ("object", ""), + (str, ""), + ], +) +def test_implicit_fill_value(store: StorePath, dtype: str, fill_value: Any) -> None: + arr = zarr.open_array(store=store, shape=(4,), fill_value=None, zarr_format=2, dtype=dtype) + assert arr.metadata.fill_value is None + assert arr.metadata.to_dict()["fill_value"] is None + result = arr[:] + if dtype is str: + # special case + numpy_dtype = np.dtype(object) + else: + numpy_dtype = np.dtype(dtype) + expected = np.full(arr.shape, fill_value, dtype=numpy_dtype) + np.testing.assert_array_equal(result, expected) + + +def test_codec_pipeline() -> None: + # https://github.com/zarr-developers/zarr-python/issues/2243 + store = MemoryStore(mode="w") + array = zarr.create( + store=store, + shape=(1,), + dtype="i4", + zarr_format=2, + filters=[Delta(dtype="i4").get_config()], + compressor=Blosc().get_config(), + ) + array[:] = 1 + result = array[:] + expected = np.ones(1) + np.testing.assert_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["|S", "|V"]) +async def test_v2_encode_decode(dtype): + store = zarr.storage.MemoryStore(mode="w") + g = zarr.group(store=store, zarr_format=2) + g.create_array( + name="foo", + shape=(3,), + chunks=(3,), + dtype=dtype, + fill_value=b"X", + ) + + result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype()) + assert result is not None + + serialized = json.loads(result.to_bytes()) + expected = { + "chunks": [3], + "compressor": None, + "dtype": f"{dtype}0", + "fill_value": "WA==", + "filters": None, + "order": "C", + "shape": [3], + "zarr_format": 2, + "dimension_separator": ".", + } + assert serialized == expected + + data = zarr.open_array(store=store, path="foo")[:] + expected = np.full((3,), b"X", dtype=dtype) + np.testing.assert_equal(data, expected) + + +@pytest.mark.parametrize("dtype", [str, "str"]) +async def test_create_dtype_str(dtype: Any) -> None: + arr = zarr.create(shape=3, dtype=dtype, zarr_format=2) + assert arr.dtype.kind == "O" + assert arr.metadata.to_dict()["dtype"] == "|O" + assert arr.metadata.filters == (numcodecs.vlen.VLenUTF8(),) + arr[:] = ["a", "bb", "ccc"] + result = arr[:] + np.testing.assert_array_equal(result, np.array(["a", "bb", "ccc"], dtype="object")) diff --git a/zarr/__init__.py b/zarr/__init__.py deleted file mode 100644 index 6cecb40af8..0000000000 --- a/zarr/__init__.py +++ /dev/null @@ -1,70 +0,0 @@ -# flake8: noqa -from zarr.codecs import * -from zarr.convenience import ( - consolidate_metadata, - copy, - copy_all, - copy_store, - load, - open, - open_consolidated, - save, - save_array, - save_group, - tree, -) -from zarr.core import Array -from zarr.creation import ( - array, - create, - empty, - empty_like, - full, - full_like, - ones, - ones_like, - open_array, - open_like, - zeros, - zeros_like, -) -from zarr.errors import CopyError, MetadataError -from zarr.hierarchy import Group, group, open_group -from zarr.n5 import N5Store, N5FSStore -from zarr._storage.store import v3_api_available -from zarr.storage import ( - ABSStore, - DBMStore, - DictStore, - DirectoryStore, - KVStore, - LMDBStore, - LRUStoreCache, - MemoryStore, - MongoDBStore, - NestedDirectoryStore, - RedisStore, - SQLiteStore, - TempStore, - ZipStore, -) -from zarr.sync import ProcessSynchronizer, ThreadSynchronizer -from zarr.version import version as __version__ - -# in case setuptools scm screw up and find version to be 0.0.0 -assert not __version__.startswith("0.0.0") - -if v3_api_available: - from zarr._storage.v3 import ( - ABSStoreV3, - DBMStoreV3, - KVStoreV3, - DirectoryStoreV3, - LMDBStoreV3, - LRUStoreCacheV3, - MemoryStoreV3, - MongoDBStoreV3, - RedisStoreV3, - SQLiteStoreV3, - ZipStoreV3, - ) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py deleted file mode 100644 index 1e49754f38..0000000000 --- a/zarr/_storage/absstore.py +++ /dev/null @@ -1,293 +0,0 @@ -"""This module contains storage classes related to Azure Blob Storage (ABS)""" - -from typing import Optional -import warnings - -from numcodecs.compat import ensure_bytes -from zarr.util import normalize_storage_path -from zarr._storage.store import ( - _get_metadata_suffix, - data_root, - meta_root, - Store, - StoreV3, - V3_DEPRECATION_MESSAGE, -) -from zarr.types import DIMENSION_SEPARATOR - -__doctest_requires__ = { - ("ABSStore", "ABSStore.*"): ["azure.storage.blob"], -} - - -class ABSStore(Store): - """Storage class using Azure Blob Storage (ABS). - - Parameters - ---------- - container : string - The name of the ABS container to use. - - .. deprecated:: - Use ``client`` instead. - - prefix : string - Location of the "directory" to use as the root of the storage hierarchy - within the container. - - account_name : string - The Azure blob storage account name. - - .. deprecated:: 2.8.3 - Use ``client`` instead. - - account_key : string - The Azure blob storage account access key. - - .. deprecated:: 2.8.3 - Use ``client`` instead. - - blob_service_kwargs : dictionary - Extra arguments to be passed into the azure blob client, for e.g. when - using the emulator, pass in blob_service_kwargs={'is_emulated': True}. - - .. deprecated:: 2.8.3 - Use ``client`` instead. - - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - - client : azure.storage.blob.ContainerClient, optional - And ``azure.storage.blob.ContainerClient`` to connect with. See - `here `_ # noqa - for more. - - .. versionadded:: 2.8.3 - - Notes - ----- - In order to use this store, you must install the Microsoft Azure Storage SDK for Python, - ``azure-storage-blob>=12.5.0``. - """ # noqa: E501 - - def __init__( - self, - container=None, - prefix="", - account_name=None, - account_key=None, - blob_service_kwargs=None, - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - client=None, - ): - warnings.warn( - V3_DEPRECATION_MESSAGE.format(store=self.__class__.__name__), - FutureWarning, - stacklevel=3, - ) - - self._dimension_separator = dimension_separator - self.prefix = normalize_storage_path(prefix) - if client is None: - # deprecated option, try to construct the client for them - msg = ( - "Providing 'container', 'account_name', 'account_key', and 'blob_service_kwargs'" - "is deprecated. Provide and instance of 'azure.storage.blob.ContainerClient' " - "'client' instead." - ) - warnings.warn(msg, FutureWarning, stacklevel=2) - from azure.storage.blob import ContainerClient - - blob_service_kwargs = blob_service_kwargs or {} - client = ContainerClient( - f"https://{account_name}.blob.core.windows.net/", - container, - credential=account_key, - **blob_service_kwargs, - ) - - self.client = client - self._container = container - self._account_name = account_name - self._account_key = account_key - - @staticmethod - def _warn_deprecated(property_): - msg = ( - "The {} property is deprecated and will be removed in a future " - "version. Get the property from 'ABSStore.client' instead." - ) - warnings.warn(msg.format(property_), FutureWarning, stacklevel=3) - - @property - def container(self): - self._warn_deprecated("container") - return self._container - - @property - def account_name(self): - self._warn_deprecated("account_name") - return self._account_name - - @property - def account_key(self): - self._warn_deprecated("account_key") - return self._account_key - - def _append_path_to_prefix(self, path): - if self.prefix == "": - return normalize_storage_path(path) - else: - return "/".join([self.prefix, normalize_storage_path(path)]) - - @staticmethod - def _strip_prefix_from_path(path, prefix): - # normalized things will not have any leading or trailing slashes - path_norm = normalize_storage_path(path) - prefix_norm = normalize_storage_path(prefix) - if prefix: - return path_norm[(len(prefix_norm) + 1) :] - else: - return path_norm - - def __getitem__(self, key): - from azure.core.exceptions import ResourceNotFoundError - - blob_name = self._append_path_to_prefix(key) - try: - return self.client.download_blob(blob_name).readall() - except ResourceNotFoundError as e: - raise KeyError(f"Blob {blob_name} not found") from e - - def __setitem__(self, key, value): - value = ensure_bytes(value) - blob_name = self._append_path_to_prefix(key) - self.client.upload_blob(blob_name, value, overwrite=True) - - def __delitem__(self, key): - from azure.core.exceptions import ResourceNotFoundError - - try: - self.client.delete_blob(self._append_path_to_prefix(key)) - except ResourceNotFoundError as e: - raise KeyError(f"Blob {key} not found") from e - - def __eq__(self, other): - return ( - isinstance(other, ABSStore) - and self.client == other.client - and self.prefix == other.prefix - ) - - def keys(self): - return list(self.__iter__()) - - def __iter__(self): - if self.prefix: - list_blobs_prefix = self.prefix + "/" - else: - list_blobs_prefix = None - for blob in self.client.list_blobs(list_blobs_prefix): - yield self._strip_prefix_from_path(blob.name, self.prefix) - - def __len__(self): - return len(self.keys()) - - def __contains__(self, key): - blob_name = self._append_path_to_prefix(key) - return self.client.get_blob_client(blob_name).exists() - - def listdir(self, path=None): - dir_path = normalize_storage_path(self._append_path_to_prefix(path)) - if dir_path: - dir_path += "/" - items = [ - self._strip_prefix_from_path(blob.name, dir_path) - for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter="/") - ] - return items - - def rmdir(self, path=None): - dir_path = normalize_storage_path(self._append_path_to_prefix(path)) - if dir_path: - dir_path += "/" - for blob in self.client.list_blobs(name_starts_with=dir_path): - self.client.delete_blob(blob) - - def getsize(self, path=None): - store_path = normalize_storage_path(path) - fs_path = self._append_path_to_prefix(store_path) - if fs_path: - blob_client = self.client.get_blob_client(fs_path) - else: - blob_client = None - - if blob_client and blob_client.exists(): - return blob_client.get_blob_properties().size - else: - size = 0 - if fs_path == "": - fs_path = None - elif not fs_path.endswith("/"): - fs_path += "/" - for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter="/"): - blob_client = self.client.get_blob_client(blob) - if blob_client.exists(): - size += blob_client.get_blob_properties().size - return size - - def clear(self): - self.rmdir() - - -class ABSStoreV3(ABSStore, StoreV3): - def list(self): - return list(self.keys()) - - def __eq__(self, other): - return ( - isinstance(other, ABSStoreV3) - and self.client == other.client - and self.prefix == other.prefix - ) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def rmdir(self, path=None): - if not path: - # Currently allowing clear to delete everything as in v2 - - # If we disallow an empty path then we will need to modify - # TestABSStoreV3 to have the create_store method use a prefix. - ABSStore.rmdir(self, "") - return - - meta_dir = meta_root + path - meta_dir = meta_dir.rstrip("/") - ABSStore.rmdir(self, meta_dir) - - # remove data folder - data_dir = data_root + path - data_dir = data_dir.rstrip("/") - ABSStore.rmdir(self, data_dir) - - # remove metadata files - sfx = _get_metadata_suffix(self) - array_meta_file = meta_dir + ".array" + sfx - if array_meta_file in self: - del self[array_meta_file] - group_meta_file = meta_dir + ".group" + sfx - if group_meta_file in self: - del self[group_meta_file] - - # TODO: adapt the v2 getsize method to work for v3 - # For now, calling the generic keys-based _getsize - def getsize(self, path=None): - from zarr.storage import _getsize # avoid circular import - - return _getsize(self, path) - - -ABSStoreV3.__doc__ = ABSStore.__doc__ diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py deleted file mode 100644 index dba29d13c0..0000000000 --- a/zarr/_storage/store.py +++ /dev/null @@ -1,715 +0,0 @@ -import abc -import os -import warnings -from collections import defaultdict -from collections.abc import MutableMapping -from copy import copy -from string import ascii_letters, digits -from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Union - -from zarr.meta import Metadata2, Metadata3 -from zarr.util import normalize_storage_path -from zarr.context import Context -from zarr.types import ZARR_VERSION - -# v2 store keys -array_meta_key = ".zarray" -group_meta_key = ".zgroup" -attrs_key = ".zattrs" - -# v3 paths -meta_root = "meta/root/" -data_root = "data/root/" - -DEFAULT_ZARR_VERSION: ZARR_VERSION = 2 - -v3_api_available = os.environ.get("ZARR_V3_EXPERIMENTAL_API", "0").lower() not in ["0", "false"] -_has_warned_about_v3 = False # to avoid printing the warning multiple times - -V3_DEPRECATION_MESSAGE = ( - "The {store} is deprecated and will be removed in a Zarr-Python version 3, see " - "https://github.com/zarr-developers/zarr-python/issues/1274 for more information." -) - - -def assert_zarr_v3_api_available(): - # we issue a warning about the experimental v3 implementation when it is first used - global _has_warned_about_v3 - if v3_api_available and not _has_warned_about_v3: - warnings.warn( - "The experimental Zarr V3 implementation in this version of Zarr-Python is not " - "in alignment with the final V3 specification. This version will be removed in " - "Zarr-Python 3 in favor of a spec compliant version.", - FutureWarning, - stacklevel=1, - ) - _has_warned_about_v3 = True - if not v3_api_available: - raise NotImplementedError( - "# V3 reading and writing is experimental! To enable support, set:\n" - "ZARR_V3_EXPERIMENTAL_API=1" - ) # pragma: no cover - - -class BaseStore(MutableMapping): - """Abstract base class for store implementations. - - This is a thin wrapper over MutableMapping that provides methods to check - whether a store is readable, writeable, eraseable and or listable. - - Stores cannot be mutable mapping as they do have a couple of other - requirements that would break Liskov substitution principle (stores only - allow strings as keys, mutable mapping are more generic). - - Having no-op base method also helps simplifying store usage and do not need - to check the presence of attributes and methods, like `close()`. - - Stores can be used as context manager to make sure they close on exit. - - .. added: 2.11.0 - - """ - - _readable = True - _writeable = True - _erasable = True - _listable = True - _store_version = 2 - _metadata_class = Metadata2 - - def is_readable(self): - return self._readable - - def is_writeable(self): - return self._writeable - - def is_listable(self): - return self._listable - - def is_erasable(self): - return self._erasable - - def __enter__(self): - if not hasattr(self, "_open_count"): - self._open_count = 0 - self._open_count += 1 - return self - - def __exit__(self, exc_type, exc_value, traceback): - self._open_count -= 1 - if self._open_count == 0: - self.close() - - def close(self) -> None: - """Do nothing by default""" - pass - - def rename(self, src_path: str, dst_path: str) -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rename"' - ) # pragma: no cover - _rename_from_keys(self, src_path, dst_path) - - @staticmethod - def _ensure_store(store: Any): - """ - We want to make sure internally that zarr stores are always a class - with a specific interface derived from ``BaseStore``, which is slightly - different than ``MutableMapping``. - - We'll do this conversion in a few places automatically - """ - from zarr.storage import KVStore # avoid circular import - - if isinstance(store, BaseStore): - if not store._store_version == 2: - raise ValueError( - f"cannot initialize a v2 store with a v{store._store_version} store" - ) - return store - elif isinstance(store, MutableMapping): - return KVStore(store) - else: - for attr in [ - "keys", - "values", - "get", - "__setitem__", - "__getitem__", - "__delitem__", - "__contains__", - ]: - if not hasattr(store, attr): - break - else: - return KVStore(store) - - raise ValueError( - "Starting with Zarr 2.11.0, stores must be subclasses of " - "BaseStore, if your store exposes the MutableMapping interface " - f"wrap it in Zarr.storage.KVStore. Got {store}" - ) - - def getitems( - self, keys: Sequence[str], *, contexts: Mapping[str, Context] - ) -> Mapping[str, Any]: - """Retrieve data from multiple keys. - - Parameters - ---------- - keys : Iterable[str] - The keys to retrieve - contexts: Mapping[str, Context] - A mapping of keys to their context. Each context is a mapping of store - specific information. E.g. a context could be a dict telling the store - the preferred output array type: `{"meta_array": cupy.empty(())}` - - Returns - ------- - Mapping - A collection mapping the input keys to their results. - - Notes - ----- - This default implementation uses __getitem__() to read each key sequentially and - ignores contexts. Overwrite this method to implement concurrent reads of multiple - keys and/or to utilize the contexts. - """ - return {k: self[k] for k in keys if k in self} - - -class Store(BaseStore): - """Abstract store class used by implementations following the Zarr v2 spec. - - Adds public `listdir`, `rename`, and `rmdir` methods on top of BaseStore. - - .. added: 2.11.0 - - """ - - def listdir(self, path: str = "") -> List[str]: - path = normalize_storage_path(path) - return _listdir_from_keys(self, path) - - def rmdir(self, path: str = "") -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rmdir"' - ) # pragma: no cover - path = normalize_storage_path(path) - _rmdir_from_keys(self, path) - - -class StoreV3(BaseStore): - _store_version = 3 - _metadata_class = Metadata3 - _valid_key_characters = set(ascii_letters + digits + "/.-_") - - def _valid_key(self, key: str) -> bool: - """ - Verify that a key conforms to the specification. - - A key is any string containing only character in the range a-z, A-Z, - 0-9, or in the set /.-_ it will return True if that's the case, False - otherwise. - """ - if not isinstance(key, str) or not key.isascii(): - return False - if set(key) - self._valid_key_characters: - return False - return True - - def _validate_key(self, key: str): - """ - Verify that a key conforms to the v3 specification. - - A key is any string containing only character in the range a-z, A-Z, - 0-9, or in the set /.-_ it will return True if that's the case, False - otherwise. - - In spec v3, keys can only start with the prefix meta/, data/ or be - exactly zarr.json and should not end with /. This should not be exposed - to the user, and is a store implementation detail, so this method will - raise a ValueError in that case. - """ - if not self._valid_key(key): - raise ValueError( - f"Keys must be ascii strings and may only contain the " - f"characters {''.join(sorted(self._valid_key_characters))}" - ) - - if ( - not key.startswith(("data/", "meta/")) - and key != "zarr.json" - # TODO: Possibly allow key == ".zmetadata" too if we write a - # consolidated metadata spec corresponding to this? - ): - raise ValueError(f"key starts with unexpected value: `{key}`") - - if key.endswith("/"): - raise ValueError("keys may not end in /") - - def list_prefix(self, prefix): - if prefix.startswith("/"): - raise ValueError("prefix must not begin with /") - # TODO: force prefix to end with /? - return [k for k in self.list() if k.startswith(prefix)] - - def erase(self, key): - self.__delitem__(key) - - def erase_prefix(self, prefix): - assert prefix.endswith("/") - - if prefix == "/": - all_keys = self.list() - else: - all_keys = self.list_prefix(prefix) - for key in all_keys: - self.erase(key) - - def list_dir(self, prefix): - """ - TODO: carefully test this with trailing/leading slashes - """ - if prefix: # allow prefix = "" ? - assert prefix.endswith("/") - - all_keys = self.list_prefix(prefix) - len_prefix = len(prefix) - keys = [] - prefixes = [] - for k in all_keys: - trail = k[len_prefix:] - if "/" not in trail: - keys.append(prefix + trail) - else: - prefixes.append(prefix + trail.split("/", maxsplit=1)[0] + "/") - return keys, list(set(prefixes)) - - def list(self): - return list(self.keys()) - - def __contains__(self, key): - return key in self.list() - - @abc.abstractmethod - def __setitem__(self, key, value): - """Set a value.""" - - @abc.abstractmethod - def __getitem__(self, key): - """Get a value.""" - - @abc.abstractmethod - def rmdir(self, path=None): - """Remove a data path and all its subkeys and related metadata. - Expects a path without the data or meta root prefix.""" - - @property - def supports_efficient_get_partial_values(self): - return False - - def get_partial_values( - self, key_ranges: Sequence[Tuple[str, Tuple[int, Optional[int]]]] - ) -> List[Union[bytes, memoryview, bytearray]]: - """Get multiple partial values. - key_ranges can be an iterable of key, range pairs, - where a range specifies two integers range_start and range_length - as a tuple, (range_start, range_length). - range_length may be None to indicate to read until the end. - range_start may be negative to start reading range_start bytes - from the end of the file. - A key may occur multiple times with different ranges. - Inserts None for missing keys into the returned list.""" - results: List[Union[bytes, memoryview, bytearray]] = [None] * len(key_ranges) # type: ignore[list-item] # noqa: E501 - indexed_ranges_by_key: Dict[str, List[Tuple[int, Tuple[int, Optional[int]]]]] = defaultdict( - list - ) - for i, (key, range_) in enumerate(key_ranges): - indexed_ranges_by_key[key].append((i, range_)) - for key, indexed_ranges in indexed_ranges_by_key.items(): - try: - value = self[key] - except KeyError: # pragma: no cover - continue - for i, (range_from, range_length) in indexed_ranges: - if range_length is None: - results[i] = value[range_from:] - else: - results[i] = value[range_from : range_from + range_length] - return results - - def supports_efficient_set_partial_values(self): - return False - - def set_partial_values(self, key_start_values): - """Set multiple partial values. - key_start_values can be an iterable of key, start and value triplets - as tuples, (key, start, value), where start defines the offset in bytes. - A key may occur multiple times with different starts and non-overlapping values. - Also, start may only be beyond the current value if other values fill the gap. - start may be negative to start writing start bytes from the current - end of the file, ending the file with the new value.""" - unique_keys = set(next(zip(*key_start_values))) - values = {} - for key in unique_keys: - old_value = self.get(key) - values[key] = None if old_value is None else bytearray(old_value) - for key, start, value in key_start_values: - if values[key] is None: - assert start == 0 - values[key] = value - else: - if start > len(values[key]): # pragma: no cover - raise ValueError( - f"Cannot set value at start {start}, " - + f"since it is beyond the data at key {key}, " - + f"having length {len(values[key])}." - ) - if start < 0: - values[key][start:] = value - else: - values[key][start : start + len(value)] = value - for key, value in values.items(): - self[key] = value - - def clear(self): - """Remove all items from store.""" - self.erase_prefix("/") - - def __eq__(self, other): - return NotImplemented - - @staticmethod - def _ensure_store(store): - """ - We want to make sure internally that zarr stores are always a class - with a specific interface derived from ``Store``, which is slightly - different than ``MutableMapping``. - - We'll do this conversion in a few places automatically - """ - from zarr._storage.v3 import KVStoreV3 # avoid circular import - - if store is None: - return None - elif isinstance(store, StoreV3): - return store - elif isinstance(store, Store): - raise ValueError(f"cannot initialize a v3 store with a v{store._store_version} store") - elif isinstance(store, MutableMapping): - return KVStoreV3(store) - else: - for attr in [ - "keys", - "values", - "get", - "__setitem__", - "__getitem__", - "__delitem__", - "__contains__", - ]: - if not hasattr(store, attr): - break - else: - return KVStoreV3(store) - - raise ValueError( - "v3 stores must be subclasses of StoreV3, " - "if your store exposes the MutableMapping interface wrap it in " - f"Zarr.storage.KVStoreV3. Got {store}" - ) - - -class StorageTransformer(MutableMapping, abc.ABC): - """Base class for storage transformers. The methods simply pass on the data as-is - and should be overwritten by sub-classes.""" - - _store_version = 3 - _metadata_class = Metadata3 - - def __init__(self, _type) -> None: - if _type not in self.valid_types: # pragma: no cover - raise ValueError( - f"Storage transformer cannot be initialized with type {_type}, " - + f"must be one of {list(self.valid_types)}." - ) - self.type = _type - self._inner_store = None - - def _copy_for_array(self, array, inner_store): - transformer_copy = copy(self) - transformer_copy._inner_store = inner_store - return transformer_copy - - @abc.abstractproperty - def extension_uri(self): - pass # pragma: no cover - - @abc.abstractproperty - def valid_types(self): - pass # pragma: no cover - - def get_config(self): - """Return a dictionary holding configuration parameters for this - storage transformer. All values must be compatible with JSON encoding.""" - # Override in sub-class if need special encoding of config values. - # By default, assume all non-private members are configuration - # parameters except for type . - return {k: v for k, v in self.__dict__.items() if not k.startswith("_") and k != "type"} - - @classmethod - def from_config(cls, _type, config): - """Instantiate storage transformer from a configuration object.""" - # override in sub-class if need special decoding of config values - - # by default, assume constructor accepts configuration parameters as - # keyword arguments without any special decoding - return cls(_type, **config) - - @property - def inner_store(self) -> Union["StorageTransformer", StoreV3]: - assert ( - self._inner_store is not None - ), "inner_store is not initialized, first get a copy via _copy_for_array." - return self._inner_store - - # The following implementations are usually fine to keep as-is: - - def __eq__(self, other): - return ( - type(self) is type(other) - and self._inner_store == other._inner_store - and self.get_config() == other.get_config() - ) - - def erase(self, key): - self.__delitem__(key) - - def list(self): - return list(self.keys()) - - def list_dir(self, prefix): - return StoreV3.list_dir(self, prefix) - - def is_readable(self): - return self.inner_store.is_readable() - - def is_writeable(self): - return self.inner_store.is_writeable() - - def is_listable(self): - return self.inner_store.is_listable() - - def is_erasable(self): - return self.inner_store.is_erasable() - - def clear(self): - return self.inner_store.clear() - - def __enter__(self): - return self.inner_store.__enter__() - - def __exit__(self, exc_type, exc_value, traceback): - return self.inner_store.__exit__(exc_type, exc_value, traceback) - - def close(self) -> None: - return self.inner_store.close() - - # The following implementations might need to be re-implemented - # by subclasses implementing storage transformers: - - def rename(self, src_path: str, dst_path: str) -> None: - return self.inner_store.rename(src_path, dst_path) - - def list_prefix(self, prefix): - return self.inner_store.list_prefix(prefix) - - def erase_prefix(self, prefix): - return self.inner_store.erase_prefix(prefix) - - def rmdir(self, path=None): - return self.inner_store.rmdir(path) - - def __contains__(self, key): - return self.inner_store.__contains__(key) - - def __setitem__(self, key, value): - return self.inner_store.__setitem__(key, value) - - def __getitem__(self, key): - return self.inner_store.__getitem__(key) - - def __delitem__(self, key): - return self.inner_store.__delitem__(key) - - def __iter__(self): - return self.inner_store.__iter__() - - def __len__(self): - return self.inner_store.__len__() - - @property - def supports_efficient_get_partial_values(self): - return self.inner_store.supports_efficient_get_partial_values - - def get_partial_values(self, key_ranges): - return self.inner_store.get_partial_values(key_ranges) - - def supports_efficient_set_partial_values(self): - return self.inner_store.supports_efficient_set_partial_values() - - def set_partial_values(self, key_start_values): - return self.inner_store.set_partial_values(key_start_values) - - -# allow MutableMapping for backwards compatibility -StoreLike = Union[BaseStore, MutableMapping] - - -def _path_to_prefix(path: Optional[str]) -> str: - # assume path already normalized - if path: - prefix = path + "/" - else: - prefix = "" - return prefix - - -def _get_hierarchy_metadata(store: StoreV3) -> Mapping[str, Any]: - version = getattr(store, "_store_version", 2) - if version < 3: - raise ValueError("zarr.json hierarchy metadata not stored for " f"zarr v{version} stores") - if "zarr.json" not in store: - raise ValueError("zarr.json metadata not found in store") - return store._metadata_class.decode_hierarchy_metadata(store["zarr.json"]) - - -def _get_metadata_suffix(store: StoreV3) -> str: - if "zarr.json" in store: - return _get_hierarchy_metadata(store)["metadata_key_suffix"] - return ".json" - - -def _rename_metadata_v3(store: StoreV3, src_path: str, dst_path: str) -> bool: - """Rename source or group metadata file associated with src_path.""" - any_renamed = False - sfx = _get_metadata_suffix(store) - src_path = src_path.rstrip("/") - dst_path = dst_path.rstrip("/") - _src_array_json = meta_root + src_path + ".array" + sfx - if _src_array_json in store: - new_key = meta_root + dst_path + ".array" + sfx - store[new_key] = store.pop(_src_array_json) - any_renamed = True - _src_group_json = meta_root + src_path + ".group" + sfx - if _src_group_json in store: - new_key = meta_root + dst_path + ".group" + sfx - store[new_key] = store.pop(_src_group_json) - any_renamed = True - return any_renamed - - -def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: - # assume path already normalized - src_prefix = _path_to_prefix(src_path) - dst_prefix = _path_to_prefix(dst_path) - version = getattr(store, "_store_version", 2) - if version == 2: - for key in list(store.keys()): - if key.startswith(src_prefix): - new_key = dst_prefix + key.lstrip(src_prefix) - store[new_key] = store.pop(key) - else: - any_renamed = False - for root_prefix in [meta_root, data_root]: - _src_prefix = root_prefix + src_prefix - _dst_prefix = root_prefix + dst_prefix - for key in store.list_prefix(_src_prefix): # type: ignore - new_key = _dst_prefix + key[len(_src_prefix) :] - store[new_key] = store.pop(key) - any_renamed = True - any_meta_renamed = _rename_metadata_v3(store, src_path, dst_path) # type: ignore - any_renamed = any_meta_renamed or any_renamed - - if not any_renamed: - raise ValueError(f"no item {src_path} found to rename") - - -def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: - # assume path already normalized - prefix = _path_to_prefix(path) - for key in list(store.keys()): - if key.startswith(prefix): - del store[key] - - -def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: - meta_dir = meta_root + path - meta_dir = meta_dir.rstrip("/") - _rmdir_from_keys(store, meta_dir) - - # remove data folder - data_dir = data_root + path - data_dir = data_dir.rstrip("/") - _rmdir_from_keys(store, data_dir) - - # remove metadata files - sfx = _get_metadata_suffix(store) - array_meta_file = meta_dir + ".array" + sfx - if array_meta_file in store: - store.erase(array_meta_file) - group_meta_file = meta_dir + ".group" + sfx - if group_meta_file in store: - store.erase(group_meta_file) - - -def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]: - # assume path already normalized - prefix = _path_to_prefix(path) - children = set() - for key in list(store.keys()): - if key.startswith(prefix) and len(key) > len(prefix): - suffix = key[len(prefix) :] - child = suffix.split("/")[0] - children.add(child) - return sorted(children) - - -def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: - if getattr(store, "_store_version", 2) == 3: - sfx = _get_metadata_suffix(store) # type: ignore - if prefix: - key = meta_root + prefix.rstrip("/") + ".array" + sfx - else: - key = meta_root[:-1] + ".array" + sfx - else: - key = prefix + array_meta_key - return key - - -def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: - if getattr(store, "_store_version", 2) == 3: - sfx = _get_metadata_suffix(store) # type: ignore - if prefix: - key = meta_root + prefix.rstrip("/") + ".group" + sfx - else: - key = meta_root[:-1] + ".group" + sfx - else: - key = prefix + group_meta_key - return key - - -def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str: - if getattr(store, "_store_version", 2) == 3: - # for v3, attributes are stored in the array metadata - sfx = _get_metadata_suffix(store) # type: ignore - if prefix: - key = meta_root + prefix.rstrip("/") + ".array" + sfx - else: - key = meta_root[:-1] + ".array" + sfx - else: - key = prefix + attrs_key - return key diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py deleted file mode 100644 index 4987f820cf..0000000000 --- a/zarr/_storage/v3.py +++ /dev/null @@ -1,628 +0,0 @@ -import os -import shutil -from collections import OrderedDict -from collections.abc import MutableMapping -from threading import Lock -from typing import Union, Dict, Any, Optional - -from zarr.errors import ( - MetadataError, - ReadOnlyError, -) -from zarr.util import buffer_size, json_loads, normalize_storage_path -from zarr.types import DIMENSION_SEPARATOR - -from zarr._storage.absstore import ABSStoreV3 # noqa: F401 -from zarr._storage.store import ( # noqa: F401 - _get_hierarchy_metadata, - _get_metadata_suffix, - _listdir_from_keys, - _rename_from_keys, - _rename_metadata_v3, - _rmdir_from_keys, - _rmdir_from_keys_v3, - _path_to_prefix, - _prefix_to_array_key, - _prefix_to_group_key, - array_meta_key, - attrs_key, - data_root, - group_meta_key, - meta_root, - BaseStore, - Store, - StoreV3, -) -from zarr.storage import ( - DBMStore, - ConsolidatedMetadataStore, - DirectoryStore, - FSStore, - KVStore, - LMDBStore, - LRUStoreCache, - MemoryStore, - MongoDBStore, - RedisStore, - SQLiteStore, - ZipStore, - _getsize, -) - -__doctest_requires__ = { - ("RedisStore", "RedisStore.*"): ["redis"], - ("MongoDBStore", "MongoDBStore.*"): ["pymongo"], - ("LRUStoreCache", "LRUStoreCache.*"): ["s3fs"], -} - - -try: - # noinspection PyUnresolvedReferences - from zarr.codecs import Blosc - - default_compressor = Blosc() -except ImportError: # pragma: no cover - from zarr.codecs import Zlib - - default_compressor = Zlib() - - -Path = Union[str, bytes, None] -# allow MutableMapping for backwards compatibility -StoreLike = Union[BaseStore, MutableMapping] - - -class RmdirV3: - """Mixin class that can be used to ensure override of any existing v2 rmdir class.""" - - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) # type: ignore - - -class KVStoreV3(RmdirV3, KVStore, StoreV3): - def list(self): - return list(self._mutable_mapping.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def __eq__(self, other): - return isinstance(other, KVStoreV3) and self._mutable_mapping == other._mutable_mapping - - -KVStoreV3.__doc__ = KVStore.__doc__ - - -def _get_files_and_dirs_from_path(store, path): - path = normalize_storage_path(path) - - files = [] - # add array metadata file if present - array_key = _prefix_to_array_key(store, path) - if array_key in store: - files.append(os.path.join(store.path, array_key)) - - # add group metadata file if present - group_key = _prefix_to_group_key(store, path) - if group_key in store: - files.append(os.path.join(store.path, group_key)) - - dirs = [] - # add array and group folders if present - for d in [data_root + path, meta_root + path]: - dir_path = os.path.join(store.path, d) - if os.path.exists(dir_path): - dirs.append(dir_path) - return files, dirs - - -class FSStoreV3(FSStore, StoreV3): - # FSStoreV3 doesn't use this (FSStore uses it within _normalize_key) - _META_KEYS = () - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def _default_key_separator(self): - if self.key_separator is None: - self.key_separator = "/" - - def list(self): - return list(self.keys()) - - def _normalize_key(self, key): - key = normalize_storage_path(key).lstrip("/") - return key.lower() if self.normalize_keys else key - - def getsize(self, path=None): - size = 0 - if path is None or path == "": - # size of both the data and meta subdirs - dirs = [] - for d in ["data/root", "meta/root"]: - dir_path = os.path.join(self.path, d) - if os.path.exists(dir_path): - dirs.append(dir_path) - elif path in self: - # access individual element by full path - return buffer_size(self[path]) - else: - files, dirs = _get_files_and_dirs_from_path(self, path) - for file in files: - size += os.path.getsize(file) - for d in dirs: - size += self.fs.du(d, total=True, maxdepth=None) - return size - - def setitems(self, values): - if self.mode == "r": - raise ReadOnlyError() - values = {self._normalize_key(key): val for key, val in values.items()} - - # initialize the /data/root/... folder corresponding to the array! - # Note: zarr.tests.test_core_v3.TestArrayWithFSStoreV3PartialRead fails - # without this explicit creation of directories - subdirectories = set(os.path.dirname(v) for v in values.keys()) - for subdirectory in subdirectories: - data_dir = os.path.join(self.path, subdirectory) - if not self.fs.exists(data_dir): - self.fs.mkdir(data_dir) - - self.map.setitems(values) - - def rmdir(self, path=None): - if self.mode == "r": - raise ReadOnlyError() - if path: - for base in [meta_root, data_root]: - store_path = self.dir_path(base + path) - if self.fs.isdir(store_path): - self.fs.rm(store_path, recursive=True) - - # remove any associated metadata files - sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip("/") - array_meta_file = meta_dir + ".array" + sfx - self.pop(array_meta_file, None) - group_meta_file = meta_dir + ".group" + sfx - self.pop(group_meta_file, None) - else: - store_path = self.dir_path(path) - if self.fs.isdir(store_path): - self.fs.rm(store_path, recursive=True) - - @property - def supports_efficient_get_partial_values(self): - return True - - def get_partial_values(self, key_ranges): - """Get multiple partial values. - key_ranges can be an iterable of key, range pairs, - where a range specifies two integers range_start and range_length - as a tuple, (range_start, range_length). - range_length may be None to indicate to read until the end. - range_start may be negative to start reading range_start bytes - from the end of the file. - A key may occur multiple times with different ranges. - Inserts None for missing keys into the returned list.""" - results = [] - for key, (range_start, range_length) in key_ranges: - key = self._normalize_key(key) - path = self.dir_path(key) - try: - if range_start is None or range_length is None: - end = None - else: - end = range_start + range_length - result = self.fs.cat_file(path, start=range_start, end=end) - except self.map.missing_exceptions: - result = None - results.append(result) - return results - - -class MemoryStoreV3(MemoryStore, StoreV3): - def __init__( - self, root=None, cls=dict, dimension_separator: Optional[DIMENSION_SEPARATOR] = None - ): - if root is None: - self.root = cls() - else: - self.root = root - self.cls = cls - self.write_mutex = Lock() - self._dimension_separator = dimension_separator # TODO: modify for v3? - - def __eq__(self, other): - return ( - isinstance(other, MemoryStoreV3) and self.root == other.root and self.cls == other.cls - ) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def list(self): - return list(self.keys()) - - def getsize(self, path: Path = None): - return _getsize(self, path) - - def rename(self, src_path: Path, dst_path: Path): - src_path = normalize_storage_path(src_path) - dst_path = normalize_storage_path(dst_path) - - any_renamed = False - for base in [meta_root, data_root]: - if self.list_prefix(base + src_path): - src_parent, src_key = self._get_parent(base + src_path) - dst_parent, dst_key = self._require_parent(base + dst_path) - - if src_key in src_parent: - dst_parent[dst_key] = src_parent.pop(src_key) - - if base == meta_root: - # check for and move corresponding metadata - sfx = _get_metadata_suffix(self) - src_meta = src_key + ".array" + sfx - if src_meta in src_parent: - dst_meta = dst_key + ".array" + sfx - dst_parent[dst_meta] = src_parent.pop(src_meta) - src_meta = src_key + ".group" + sfx - if src_meta in src_parent: - dst_meta = dst_key + ".group" + sfx - dst_parent[dst_meta] = src_parent.pop(src_meta) - any_renamed = True - any_renamed = _rename_metadata_v3(self, src_path, dst_path) or any_renamed - if not any_renamed: - raise ValueError(f"no item {src_path} found to rename") - - def rmdir(self, path: Path = None): - path = normalize_storage_path(path) - if path: - for base in [meta_root, data_root]: - try: - parent, key = self._get_parent(base + path) - value = parent[key] - except KeyError: - continue - else: - if isinstance(value, self.cls): - del parent[key] - - # remove any associated metadata files - sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip("/") - array_meta_file = meta_dir + ".array" + sfx - self.pop(array_meta_file, None) - group_meta_file = meta_dir + ".group" + sfx - self.pop(group_meta_file, None) - else: - # clear out root - self.root = self.cls() - - -MemoryStoreV3.__doc__ = MemoryStore.__doc__ - - -class DirectoryStoreV3(DirectoryStore, StoreV3): - def list(self): - return list(self.keys()) - - def __eq__(self, other): - return isinstance(other, DirectoryStoreV3) and self.path == other.path - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def getsize(self, path: Path = None): - return _getsize(self, path) - - def rename(self, src_path, dst_path, metadata_key_suffix=".json"): - store_src_path = normalize_storage_path(src_path) - store_dst_path = normalize_storage_path(dst_path) - - dir_path = self.path - any_existed = False - for root_prefix in ["meta", "data"]: - src_path = os.path.join(dir_path, root_prefix, "root", store_src_path) - if os.path.exists(src_path): - any_existed = True - dst_path = os.path.join(dir_path, root_prefix, "root", store_dst_path) - os.renames(src_path, dst_path) - - for suffix in [".array" + metadata_key_suffix, ".group" + metadata_key_suffix]: - src_meta = os.path.join(dir_path, "meta", "root", store_src_path + suffix) - if os.path.exists(src_meta): - any_existed = True - dst_meta = os.path.join(dir_path, "meta", "root", store_dst_path + suffix) - dst_dir = os.path.dirname(dst_meta) - if not os.path.exists(dst_dir): - os.makedirs(dst_dir) - os.rename(src_meta, dst_meta) - if not any_existed: - raise FileNotFoundError("nothing found at src_path") - - def rmdir(self, path=None): - store_path = normalize_storage_path(path) - dir_path = self.path - if store_path: - for base in [meta_root, data_root]: - dir_path = os.path.join(dir_path, base + store_path) - if os.path.isdir(dir_path): - shutil.rmtree(dir_path) - - # remove any associated metadata files - sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip("/") - array_meta_file = meta_dir + ".array" + sfx - self.pop(array_meta_file, None) - group_meta_file = meta_dir + ".group" + sfx - self.pop(group_meta_file, None) - - elif os.path.isdir(dir_path): - shutil.rmtree(dir_path) - - -DirectoryStoreV3.__doc__ = DirectoryStore.__doc__ - - -class ZipStoreV3(ZipStore, StoreV3): - def list(self): - return list(self.keys()) - - def __eq__(self, other): - return ( - isinstance(other, ZipStore) - and self.path == other.path - and self.compression == other.compression - and self.allowZip64 == other.allowZip64 - ) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def getsize(self, path=None): - path = normalize_storage_path(path) - with self.mutex: - children = self.list_prefix(data_root + path) - children += self.list_prefix(meta_root + path) - print(f"path={path}, children={children}") - if children: - size = 0 - for name in children: - info = self.zf.getinfo(name) - size += info.compress_size - return size - elif path in self: - info = self.zf.getinfo(path) - return info.compress_size - else: - return 0 - - -ZipStoreV3.__doc__ = ZipStore.__doc__ - - -class RedisStoreV3(RmdirV3, RedisStore, StoreV3): - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -RedisStoreV3.__doc__ = RedisStore.__doc__ - - -class MongoDBStoreV3(RmdirV3, MongoDBStore, StoreV3): - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -MongoDBStoreV3.__doc__ = MongoDBStore.__doc__ - - -class DBMStoreV3(RmdirV3, DBMStore, StoreV3): - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -DBMStoreV3.__doc__ = DBMStore.__doc__ - - -class LMDBStoreV3(RmdirV3, LMDBStore, StoreV3): - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -LMDBStoreV3.__doc__ = LMDBStore.__doc__ - - -class SQLiteStoreV3(SQLiteStore, StoreV3): - def list(self): - return list(self.keys()) - - def getsize(self, path=None): - # TODO: why does the query below not work in this case? - # For now fall back to the default _getsize implementation - # size = 0 - # for _path in [data_root + path, meta_root + path]: - # c = self.cursor.execute( - # ''' - # SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr - # WHERE k LIKE (? || "%") AND - # 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") - # ''', - # (_path, _path) - # ) - # for item_size, in c: - # size += item_size - # return size - - # fallback to default implementation for now - return _getsize(self, path) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def rmdir(self, path=None): - path = normalize_storage_path(path) - if path: - for base in [meta_root, data_root]: - with self.lock: - self.cursor.execute('DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,)) - # remove any associated metadata files - sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip("/") - array_meta_file = meta_dir + ".array" + sfx - self.pop(array_meta_file, None) - group_meta_file = meta_dir + ".group" + sfx - self.pop(group_meta_file, None) - else: - self.clear() - - -SQLiteStoreV3.__doc__ = SQLiteStore.__doc__ - - -class LRUStoreCacheV3(RmdirV3, LRUStoreCache, StoreV3): - def __init__(self, store, max_size: int): - self._store = StoreV3._ensure_store(store) - self._max_size = max_size - self._current_size = 0 - self._keys_cache = None - self._contains_cache = {} - self._listdir_cache: Dict[Path, Any] = dict() - self._values_cache: Dict[Path, Any] = OrderedDict() - self._mutex = Lock() - self.hits = self.misses = 0 - - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -LRUStoreCacheV3.__doc__ = LRUStoreCache.__doc__ - - -class ConsolidatedMetadataStoreV3(ConsolidatedMetadataStore, StoreV3): - """A layer over other storage, where the metadata has been consolidated into - a single key. - - The purpose of this class, is to be able to get all of the metadata for - a given array in a single read operation from the underlying storage. - See :func:`zarr.convenience.consolidate_metadata` for how to create this - single metadata key. - - This class loads from the one key, and stores the data in a dict, so that - accessing the keys no longer requires operations on the backend store. - - This class is read-only, and attempts to change the array metadata will - fail, but changing the data is possible. If the backend storage is changed - directly, then the metadata stored here could become obsolete, and - :func:`zarr.convenience.consolidate_metadata` should be called again and the class - re-invoked. The use case is for write once, read many times. - - .. note:: This is an experimental feature. - - Parameters - ---------- - store: Store - Containing the zarr array. - metadata_key: str - The target in the store where all of the metadata are stored. We - assume JSON encoding. - - See Also - -------- - zarr.convenience.consolidate_metadata, zarr.convenience.open_consolidated - - """ - - def __init__(self, store: StoreLike, metadata_key=meta_root + "consolidated/.zmetadata"): - self.store = StoreV3._ensure_store(store) - - # retrieve consolidated metadata - meta = json_loads(self.store[metadata_key]) - - # check format of consolidated metadata - consolidated_format = meta.get("zarr_consolidated_format", None) - if consolidated_format != 1: - raise MetadataError( - f"unsupported zarr consolidated metadata format: {consolidated_format}" - ) - - # decode metadata - self.meta_store: Store = KVStoreV3(meta["metadata"]) - - def rmdir(self, key): - raise ReadOnlyError() - - -def _normalize_store_arg_v3(store: Any, storage_options=None, mode="r") -> BaseStore: - # default to v2 store for backward compatibility - zarr_version = getattr(store, "_store_version", 3) - if zarr_version != 3: - raise ValueError("store must be a version 3 store") - if store is None: - store = KVStoreV3(dict()) - # add default zarr.json metadata - store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) - return store - if isinstance(store, os.PathLike): - store = os.fspath(store) - if FSStore._fsspec_installed(): - import fsspec - - if isinstance(store, fsspec.FSMap): - return FSStoreV3( - store.root, - fs=store.fs, - mode=mode, - check=store.check, - create=store.create, - missing_exceptions=store.missing_exceptions, - **(storage_options or {}), - ) - if isinstance(store, str): - if "://" in store or "::" in store: - store = FSStoreV3(store, mode=mode, **(storage_options or {})) - elif storage_options: - raise ValueError("storage_options passed with non-fsspec path") - elif store.endswith(".zip"): - store = ZipStoreV3(store, mode=mode) - elif store.endswith(".n5"): - raise NotImplementedError("N5Store not yet implemented for V3") - # return N5StoreV3(store) - else: - store = DirectoryStoreV3(store) - else: - store = StoreV3._ensure_store(store) - - if "zarr.json" not in store: - # add default zarr.json metadata - store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) - return store diff --git a/zarr/_storage/v3_storage_transformers.py b/zarr/_storage/v3_storage_transformers.py deleted file mode 100644 index 00467d44f9..0000000000 --- a/zarr/_storage/v3_storage_transformers.py +++ /dev/null @@ -1,367 +0,0 @@ -import functools -import itertools -import os -from typing import NamedTuple, Tuple, Optional, Union, Iterator - -from numcodecs.compat import ensure_bytes -import numpy as np - -from zarr._storage.store import StorageTransformer, StoreV3, _rmdir_from_keys_v3 -from zarr.util import normalize_storage_path -from zarr.types import DIMENSION_SEPARATOR - - -MAX_UINT_64 = 2**64 - 1 - - -v3_sharding_available = os.environ.get("ZARR_V3_SHARDING", "0").lower() not in ["0", "false"] - - -def assert_zarr_v3_sharding_available(): - if not v3_sharding_available: - raise NotImplementedError( - "Using V3 sharding is experimental and not yet finalized! To enable support, set:\n" - "ZARR_V3_SHARDING=1" - ) # pragma: no cover - - -class _ShardIndex(NamedTuple): - store: "ShardingStorageTransformer" - # dtype uint64, shape (chunks_per_shard_0, chunks_per_shard_1, ..., 2) - offsets_and_lengths: np.ndarray - - def __localize_chunk__(self, chunk: Tuple[int, ...]) -> Tuple[int, ...]: - return tuple( - chunk_i % shard_i for chunk_i, shard_i in zip(chunk, self.store.chunks_per_shard) - ) - - def is_all_empty(self) -> bool: - return np.array_equiv(self.offsets_and_lengths, MAX_UINT_64) - - def get_chunk_slice(self, chunk: Tuple[int, ...]) -> Optional[slice]: - localized_chunk = self.__localize_chunk__(chunk) - chunk_start, chunk_len = self.offsets_and_lengths[localized_chunk] - if (chunk_start, chunk_len) == (MAX_UINT_64, MAX_UINT_64): - return None - else: - return slice(int(chunk_start), int(chunk_start + chunk_len)) - - def set_chunk_slice(self, chunk: Tuple[int, ...], chunk_slice: Optional[slice]) -> None: - localized_chunk = self.__localize_chunk__(chunk) - if chunk_slice is None: - self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64) - else: - self.offsets_and_lengths[localized_chunk] = ( - chunk_slice.start, - chunk_slice.stop - chunk_slice.start, - ) - - def to_bytes(self) -> bytes: - return self.offsets_and_lengths.tobytes(order="C") - - @classmethod - def from_bytes( - cls, buffer: Union[bytes, bytearray], store: "ShardingStorageTransformer" - ) -> "_ShardIndex": - try: - return cls( - store=store, - offsets_and_lengths=np.frombuffer(bytearray(buffer), dtype=" None: - assert_zarr_v3_sharding_available() - super().__init__(_type) - if isinstance(chunks_per_shard, int): - chunks_per_shard = (chunks_per_shard,) - else: - chunks_per_shard = tuple(int(i) for i in chunks_per_shard) - if chunks_per_shard == (): - chunks_per_shard = (1,) - self.chunks_per_shard = chunks_per_shard - self._num_chunks_per_shard = functools.reduce(lambda x, y: x * y, chunks_per_shard, 1) - self._dimension_separator = None - self._data_key_prefix = None - - def _copy_for_array(self, array, inner_store): - transformer_copy = super()._copy_for_array(array, inner_store) - transformer_copy._dimension_separator = array._dimension_separator - transformer_copy._data_key_prefix = array._data_key_prefix - if len(array._shape) > len(self.chunks_per_shard): - # The array shape might be longer when initialized with subdtypes. - # subdtypes dimensions come last, therefore padding chunks_per_shard - # with ones, effectively disabling sharding on the unlisted dimensions. - transformer_copy.chunks_per_shard += (1,) * ( - len(array._shape) - len(self.chunks_per_shard) - ) - return transformer_copy - - @property - def dimension_separator(self) -> DIMENSION_SEPARATOR: - assert ( - self._dimension_separator is not None - ), "dimension_separator is not initialized, first get a copy via _copy_for_array." - return self._dimension_separator - - def _is_data_key(self, key: str) -> bool: - assert ( - self._data_key_prefix is not None - ), "data_key_prefix is not initialized, first get a copy via _copy_for_array." - return key.startswith(self._data_key_prefix) - - def _key_to_shard(self, chunk_key: str) -> Tuple[str, Tuple[int, ...]]: - prefix, _, chunk_string = chunk_key.rpartition("c") - chunk_subkeys = ( - tuple(map(int, chunk_string.split(self.dimension_separator))) if chunk_string else (0,) - ) - shard_key_tuple = ( - subkey // shard_i for subkey, shard_i in zip(chunk_subkeys, self.chunks_per_shard) - ) - shard_key = prefix + "c" + self.dimension_separator.join(map(str, shard_key_tuple)) - return shard_key, chunk_subkeys - - def _get_index_from_store(self, shard_key: str) -> _ShardIndex: - # At the end of each shard 2*64bit per chunk for offset and length define the index: - index_bytes = self.inner_store.get_partial_values( - [(shard_key, (-16 * self._num_chunks_per_shard, None))] - )[0] - if index_bytes is None: - raise KeyError(shard_key) - return _ShardIndex.from_bytes( - index_bytes, - self, - ) - - def _get_index_from_buffer(self, buffer: Union[bytes, bytearray]) -> _ShardIndex: - # At the end of each shard 2*64bit per chunk for offset and length define the index: - return _ShardIndex.from_bytes(buffer[-16 * self._num_chunks_per_shard :], self) - - def _get_chunks_in_shard(self, shard_key: str) -> Iterator[Tuple[int, ...]]: - _, _, chunk_string = shard_key.rpartition("c") - shard_key_tuple = ( - tuple(map(int, chunk_string.split(self.dimension_separator))) if chunk_string else (0,) - ) - for chunk_offset in itertools.product(*(range(i) for i in self.chunks_per_shard)): - yield tuple( - shard_key_i * shards_i + offset_i - for shard_key_i, offset_i, shards_i in zip( - shard_key_tuple, chunk_offset, self.chunks_per_shard - ) - ) - - def __getitem__(self, key): - if self._is_data_key(key): - if self.supports_efficient_get_partial_values: - # Use the partial implementation, which fetches the index separately - value = self.get_partial_values([(key, (0, None))])[0] - if value is None: - raise KeyError(key) - else: - return value - shard_key, chunk_subkey = self._key_to_shard(key) - try: - full_shard_value = self.inner_store[shard_key] - except KeyError as e: - raise KeyError(key) from e - index = self._get_index_from_buffer(full_shard_value) - chunk_slice = index.get_chunk_slice(chunk_subkey) - if chunk_slice is not None: - return full_shard_value[chunk_slice] - else: - raise KeyError(key) - else: - return self.inner_store.__getitem__(key) - - def __setitem__(self, key, value): - value = ensure_bytes(value) - if self._is_data_key(key): - shard_key, chunk_subkey = self._key_to_shard(key) - chunks_to_read = set(self._get_chunks_in_shard(shard_key)) - chunks_to_read.remove(chunk_subkey) - new_content = {chunk_subkey: value} - try: - if self.supports_efficient_get_partial_values: - index = self._get_index_from_store(shard_key) - full_shard_value = None - else: - full_shard_value = self.inner_store[shard_key] - index = self._get_index_from_buffer(full_shard_value) - except KeyError: - index = _ShardIndex.create_empty(self) - else: - chunk_slices = [ - (chunk_to_read, index.get_chunk_slice(chunk_to_read)) - for chunk_to_read in chunks_to_read - ] - valid_chunk_slices = [ - (chunk_to_read, chunk_slice) - for chunk_to_read, chunk_slice in chunk_slices - if chunk_slice is not None - ] - # use get_partial_values if less than half of the available chunks must be read: - # (This can be changed when set_partial_values can be used efficiently.) - use_partial_get = ( - self.supports_efficient_get_partial_values - and len(valid_chunk_slices) < len(chunk_slices) / 2 - ) - - if use_partial_get: - chunk_values = self.inner_store.get_partial_values( - [ - ( - shard_key, - ( - chunk_slice.start, - chunk_slice.stop - chunk_slice.start, - ), - ) - for _, chunk_slice in valid_chunk_slices - ] - ) - for chunk_value, (chunk_to_read, _) in zip(chunk_values, valid_chunk_slices): - new_content[chunk_to_read] = chunk_value - else: - if full_shard_value is None: - full_shard_value = self.inner_store[shard_key] - for chunk_to_read, chunk_slice in valid_chunk_slices: - if chunk_slice is not None: - new_content[chunk_to_read] = full_shard_value[chunk_slice] - - shard_content = b"" - for chunk_subkey, chunk_content in new_content.items(): - chunk_slice = slice(len(shard_content), len(shard_content) + len(chunk_content)) - index.set_chunk_slice(chunk_subkey, chunk_slice) - shard_content += chunk_content - # Appending the index at the end of the shard: - shard_content += index.to_bytes() - self.inner_store[shard_key] = shard_content - else: # pragma: no cover - self.inner_store[key] = value - - def __delitem__(self, key): - if self._is_data_key(key): - shard_key, chunk_subkey = self._key_to_shard(key) - try: - index = self._get_index_from_store(shard_key) - except KeyError as e: - raise KeyError(key) from e - - index.set_chunk_slice(chunk_subkey, None) - - if index.is_all_empty(): - del self.inner_store[shard_key] - else: - index_bytes = index.to_bytes() - self.inner_store.set_partial_values([(shard_key, -len(index_bytes), index_bytes)]) - else: # pragma: no cover - del self.inner_store[key] - - def _shard_key_to_original_keys(self, key: str) -> Iterator[str]: - if self._is_data_key(key): - index = self._get_index_from_store(key) - prefix, _, _ = key.rpartition("c") - for chunk_tuple in self._get_chunks_in_shard(key): - if index.get_chunk_slice(chunk_tuple) is not None: - yield prefix + "c" + self.dimension_separator.join(map(str, chunk_tuple)) - else: - yield key - - def __iter__(self) -> Iterator[str]: - for key in self.inner_store: - yield from self._shard_key_to_original_keys(key) - - def __len__(self): - return sum(1 for _ in self.keys()) - - def get_partial_values(self, key_ranges): - if self.supports_efficient_get_partial_values: - transformed_key_ranges = [] - cached_indices = {} - none_indices = [] - for i, (key, range_) in enumerate(key_ranges): - if self._is_data_key(key): - shard_key, chunk_subkey = self._key_to_shard(key) - try: - index = cached_indices[shard_key] - except KeyError: - try: - index = self._get_index_from_store(shard_key) - except KeyError: - none_indices.append(i) - continue - cached_indices[shard_key] = index - chunk_slice = index.get_chunk_slice(chunk_subkey) - if chunk_slice is None: - none_indices.append(i) - continue - range_start, range_length = range_ - if range_length is None: - range_length = chunk_slice.stop - chunk_slice.start - transformed_key_ranges.append( - (shard_key, (range_start + chunk_slice.start, range_length)) - ) - else: # pragma: no cover - transformed_key_ranges.append((key, range_)) - values = self.inner_store.get_partial_values(transformed_key_ranges) - for i in none_indices: - values.insert(i, None) - return values - else: - return StoreV3.get_partial_values(self, key_ranges) - - def supports_efficient_set_partial_values(self): - return False - - def set_partial_values(self, key_start_values): - # This does not yet implement efficient set_partial_values - StoreV3.set_partial_values(self, key_start_values) - - def rename(self, src_path: str, dst_path: str) -> None: - StoreV3.rename(self, src_path, dst_path) # type: ignore[arg-type] - - def list_prefix(self, prefix): - return StoreV3.list_prefix(self, prefix) - - def erase_prefix(self, prefix): - if self._is_data_key(prefix): - StoreV3.erase_prefix(self, prefix) - else: - self.inner_store.erase_prefix(prefix) - - def rmdir(self, path=None): - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - - def __contains__(self, key): - if self._is_data_key(key): - shard_key, chunk_subkeys = self._key_to_shard(key) - try: - index = self._get_index_from_store(shard_key) - except KeyError: - return False - chunk_slice = index.get_chunk_slice(chunk_subkeys) - return chunk_slice is not None - else: - return self._inner_store.__contains__(key) diff --git a/zarr/attrs.py b/zarr/attrs.py deleted file mode 100644 index af9a5f1d30..0000000000 --- a/zarr/attrs.py +++ /dev/null @@ -1,201 +0,0 @@ -import warnings -from collections.abc import MutableMapping - -from zarr._storage.store import Store, StoreV3 -from zarr.util import json_dumps - - -class Attributes(MutableMapping): - """Class providing access to user attributes on an array or group. Should not be - instantiated directly, will be available via the `.attrs` property of an array or - group. - - Parameters - ---------- - store : MutableMapping - The store in which to store the attributes. - key : str, optional - The key under which the attributes will be stored. - read_only : bool, optional - If True, attributes cannot be modified. - cache : bool, optional - If True (default), attributes will be cached locally. - synchronizer : Synchronizer - Only necessary if attributes may be modified from multiple threads or processes. - - """ - - def __init__( - self, store, key=".zattrs", read_only=False, cache=True, synchronizer=None, cached_dict=None - ): - self._version = getattr(store, "_store_version", 2) - _Store = Store if self._version == 2 else StoreV3 - self.store = _Store._ensure_store(store) - self.key = key - self.read_only = read_only - self.cache = cache - self._cached_asdict = cached_dict if cache else None - self.synchronizer = synchronizer - - def _get_nosync(self): - try: - data = self.store[self.key] - except KeyError: - d = dict() - if self._version > 2: - d["attributes"] = {} - else: - d = self.store._metadata_class.parse_metadata(data) - return d - - def asdict(self): - """Retrieve all attributes as a dictionary.""" - if self.cache and self._cached_asdict is not None: - return self._cached_asdict - d = self._get_nosync() - if self._version == 3: - d = d["attributes"] - if self.cache: - self._cached_asdict = d - return d - - def refresh(self): - """Refresh cached attributes from the store.""" - if self.cache: - if self._version == 2: - self._cached_asdict = self._get_nosync() - else: - self._cached_asdict = self._get_nosync()["attributes"] - - def __contains__(self, x): - return x in self.asdict() - - def __getitem__(self, item): - return self.asdict()[item] - - def _write_op(self, f, *args, **kwargs): - # guard condition - if self.read_only: - raise PermissionError("attributes are read-only") - - # synchronization - if self.synchronizer is None: - return f(*args, **kwargs) - else: - with self.synchronizer[self.key]: - return f(*args, **kwargs) - - def __setitem__(self, item, value): - self._write_op(self._setitem_nosync, item, value) - - def _setitem_nosync(self, item, value): - # load existing data - d = self._get_nosync() - - # set key value - if self._version == 2: - d[item] = value - else: - d["attributes"][item] = value - - # _put modified data - self._put_nosync(d) - - def __delitem__(self, item): - self._write_op(self._delitem_nosync, item) - - def _delitem_nosync(self, key): - # load existing data - d = self._get_nosync() - - # delete key value - if self._version == 2: - del d[key] - else: - del d["attributes"][key] - - # _put modified data - self._put_nosync(d) - - def put(self, d): - """Overwrite all attributes with the key/value pairs in the provided dictionary - `d` in a single operation.""" - if self._version == 2: - self._write_op(self._put_nosync, d) - else: - self._write_op(self._put_nosync, dict(attributes=d)) - - def _put_nosync(self, d): - d_to_check = d if self._version == 2 else d["attributes"] - if not all(isinstance(item, str) for item in d_to_check): - # TODO: Raise an error for non-string keys - # raise TypeError("attribute keys must be strings") - warnings.warn( - "only attribute keys of type 'string' will be allowed in the future", - DeprecationWarning, - stacklevel=2, - ) - - try: - d_to_check = {str(k): v for k, v in d_to_check.items()} - except TypeError as ex: # pragma: no cover - raise TypeError("attribute keys can not be stringified") from ex - - if self._version == 2: - d = d_to_check - else: - d["attributes"] = d_to_check - - if self._version == 2: - self.store[self.key] = json_dumps(d) - if self.cache: - self._cached_asdict = d - else: - try: - meta_unparsed = self.store[self.key] - # Cannot write the attributes directly to JSON, but have to - # store it within the pre-existing attributes key of the v3 - # metadata. - - # Note: this changes the store.counter result in test_caching_on! - - meta = self.store._metadata_class.parse_metadata(meta_unparsed) - if "attributes" in meta and "filters" in meta["attributes"]: - # need to preserve any existing "filters" attribute - d["attributes"]["filters"] = meta["attributes"]["filters"] - meta["attributes"] = d["attributes"] - except KeyError: - meta = d - self.store[self.key] = json_dumps(meta) - if self.cache: - self._cached_asdict = d["attributes"] - - # noinspection PyMethodOverriding - def update(self, *args, **kwargs): - """Update the values of several attributes in a single operation.""" - self._write_op(self._update_nosync, *args, **kwargs) - - def _update_nosync(self, *args, **kwargs): - # load existing data - d = self._get_nosync() - - # update - if self._version == 2: - d.update(*args, **kwargs) - else: - d["attributes"].update(*args, **kwargs) - - # _put modified data - self._put_nosync(d) - - def keys(self): - return self.asdict().keys() - - def __iter__(self): - return iter(self.asdict()) - - def __len__(self): - return len(self.asdict()) - - def _ipython_key_completions_(self): - return sorted(self) diff --git a/zarr/codecs.py b/zarr/codecs.py deleted file mode 100644 index 6fd5e20401..0000000000 --- a/zarr/codecs.py +++ /dev/null @@ -1,4 +0,0 @@ -# flake8: noqa -from numcodecs import * -from numcodecs import get_codec, Blosc, Pickle, Zlib, Zstd, Delta, AsType, BZ2 -from numcodecs.registry import codec_registry diff --git a/zarr/context.py b/zarr/context.py deleted file mode 100644 index 3dd7dda4ac..0000000000 --- a/zarr/context.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import TypedDict - -from numcodecs.compat import NDArrayLike - - -class Context(TypedDict, total=False): - """A context for component specific information - - All keys are optional. Any component reading the context must provide - a default implementation in the case a key cannot be found. - - Items - ----- - meta_array : array-like, optional - An array-like instance to use for determining the preferred output - array type. - """ - - meta_array: NDArrayLike diff --git a/zarr/convenience.py b/zarr/convenience.py deleted file mode 100644 index bd284e0844..0000000000 --- a/zarr/convenience.py +++ /dev/null @@ -1,1364 +0,0 @@ -"""Convenience functions for storing and loading data.""" - -import itertools -import os -import re -from collections.abc import Mapping, MutableMapping - -from zarr._storage.store import data_root, meta_root, assert_zarr_v3_api_available -from zarr.core import Array -from zarr.creation import array as _create_array -from zarr.creation import open_array -from zarr.errors import CopyError, PathNotFoundError -from zarr.hierarchy import Group -from zarr.hierarchy import group as _create_group -from zarr.hierarchy import open_group -from zarr.meta import json_dumps, json_loads -from zarr.storage import ( - _get_metadata_suffix, - contains_array, - contains_group, - normalize_store_arg, - BaseStore, - ConsolidatedMetadataStore, -) -from zarr._storage.v3 import ConsolidatedMetadataStoreV3 -from zarr.util import TreeViewer, buffer_size, normalize_storage_path - -from typing import Union - -StoreLike = Union[BaseStore, MutableMapping, str, None] - -_builtin_open = open # builtin open is later shadowed by a local open function - - -def _check_and_update_path(store: BaseStore, path): - if getattr(store, "_store_version", 2) > 2 and not path: - raise ValueError("path must be provided for v3 stores") - return normalize_storage_path(path) - - -# noinspection PyShadowingBuiltins -def open(store: StoreLike = None, mode: str = "a", *, zarr_version=None, path=None, **kwargs): - """Convenience function to open a group or array using file-mode-like semantics. - - Parameters - ---------- - store : Store or string, optional - Store or path to directory in file system or name of zip file. - mode : {'r', 'r+', 'a', 'w', 'w-'}, optional - Persistence mode: 'r' means read only (must exist); 'r+' means - read/write (must exist); 'a' means read/write (create if doesn't - exist); 'w' means create (overwrite if exists); 'w-' means create - (fail if exists). - zarr_version : {2, 3, None}, optional - The zarr protocol version to use. The default value of None will attempt - to infer the version from `store` if possible, otherwise it will fall - back to 2. - - .. warning:: `zarr_version=3` is currently using the experimental Zarr V3 - implementation. This implementation is not in sync with the final specification - and will be replaced with a spec compliant version in the version 3.0. - - path : str or None, optional - The path within the store to open. - **kwargs - Additional parameters are passed through to :func:`zarr.creation.open_array` or - :func:`zarr.hierarchy.open_group`. - - Returns - ------- - z : :class:`zarr.core.Array` or :class:`zarr.hierarchy.Group` - Array or group, depending on what exists in the given store. - - See Also - -------- - zarr.creation.open_array, zarr.hierarchy.open_group - - Examples - -------- - - Storing data in a directory 'data/example.zarr' on the local file system:: - - >>> import zarr - >>> store = 'data/example.zarr' - >>> zw = zarr.open(store, mode='w', shape=100, dtype='i4') # open new array - >>> zw - - >>> za = zarr.open(store, mode='a') # open existing array for reading and writing - >>> za - - >>> zr = zarr.open(store, mode='r') # open existing array read-only - >>> zr - - >>> gw = zarr.open(store, mode='w') # open new group, overwriting previous data - >>> gw - - >>> ga = zarr.open(store, mode='a') # open existing group for reading and writing - >>> ga - - >>> gr = zarr.open(store, mode='r') # open existing group read-only - >>> gr - - - """ - - # handle polymorphic store arg - # we pass storage options explicitly, since normalize_store_arg might construct - # a store if the input is a fsspec-compatible URL - _store: BaseStore = normalize_store_arg( - store, - storage_options=kwargs.pop("storage_options", {}), - mode=mode, - zarr_version=zarr_version, - ) - # path = _check_and_update_path(_store, path) - path = normalize_storage_path(path) - kwargs["path"] = path - - if mode in {"w", "w-", "x"}: - if "shape" in kwargs: - return open_array(_store, mode=mode, **kwargs) - else: - return open_group(_store, mode=mode, **kwargs) - - elif mode == "a": - if "shape" in kwargs or contains_array(_store, path): - return open_array(_store, mode=mode, **kwargs) - else: - return open_group(_store, mode=mode, **kwargs) - - else: - if contains_array(_store, path): - return open_array(_store, mode=mode, **kwargs) - elif contains_group(_store, path): - return open_group(_store, mode=mode, **kwargs) - else: - raise PathNotFoundError(path) - - -def _might_close(path): - return isinstance(path, (str, os.PathLike)) - - -def save_array(store: StoreLike, arr, *, zarr_version=None, path=None, **kwargs): - """Convenience function to save a NumPy array to the local file system, following a - similar API to the NumPy save() function. - - Parameters - ---------- - store : MutableMapping or string - Store or path to directory in file system or name of zip file. - arr : ndarray - NumPy array with data to save. - zarr_version : {2, 3, None}, optional - The zarr protocol version to use when saving. The default value of None - will attempt to infer the version from `store` if possible, otherwise - it will fall back to 2. - - .. warning:: `zarr_version=3` is currently using the experimental Zarr V3 - implementation. This implementation is not in sync with the final specification - and will be replaced with a spec compliant version in the version 3.0. - - path : str or None, optional - The path within the store where the array will be saved. - kwargs - Passed through to :func:`create`, e.g., compressor. - - Examples - -------- - Save an array to a directory on the file system (uses a :class:`DirectoryStore`):: - - >>> import zarr - >>> import numpy as np - >>> arr = np.arange(10000) - >>> zarr.save_array('data/example.zarr', arr) - >>> zarr.load('data/example.zarr') - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - - Save an array to a single file (uses a :class:`ZipStore`):: - - >>> zarr.save_array('data/example.zip', arr) - >>> zarr.load('data/example.zip') - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - - """ - may_need_closing = _might_close(store) - _store: BaseStore = normalize_store_arg(store, mode="w", zarr_version=zarr_version) - path = _check_and_update_path(_store, path) - try: - _create_array( - arr, store=_store, overwrite=True, zarr_version=zarr_version, path=path, **kwargs - ) - finally: - if may_need_closing: - # needed to ensure zip file records are written - _store.close() - - -def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): - """Convenience function to save several NumPy arrays to the local file system, following a - similar API to the NumPy savez()/savez_compressed() functions. - - Parameters - ---------- - store : MutableMapping or string - Store or path to directory in file system or name of zip file. - args : ndarray - NumPy arrays with data to save. - zarr_version : {2, 3, None}, optional - The zarr protocol version to use when saving. The default value of None - will attempt to infer the version from `store` if possible, otherwise - it will fall back to 2. - - .. warning:: `zarr_version=3` is currently using the experimental Zarr V3 - implementation. This implementation is not in sync with the final specification - and will be replaced with a spec compliant version in the version 3.0. - - path : str or None, optional - Path within the store where the group will be saved. - kwargs - NumPy arrays with data to save. - - Examples - -------- - Save several arrays to a directory on the file system (uses a - :class:`DirectoryStore`): - - >>> import zarr - >>> import numpy as np - >>> a1 = np.arange(10000) - >>> a2 = np.arange(10000, 0, -1) - >>> zarr.save_group('data/example.zarr', a1, a2) - >>> loader = zarr.load('data/example.zarr') - >>> loader - - >>> loader['arr_0'] - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - >>> loader['arr_1'] - array([10000, 9999, 9998, ..., 3, 2, 1]) - - Save several arrays using named keyword arguments:: - - >>> zarr.save_group('data/example.zarr', foo=a1, bar=a2) - >>> loader = zarr.load('data/example.zarr') - >>> loader - - >>> loader['foo'] - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - >>> loader['bar'] - array([10000, 9999, 9998, ..., 3, 2, 1]) - - Store several arrays in a single zip file (uses a :class:`ZipStore`):: - - >>> zarr.save_group('data/example.zip', foo=a1, bar=a2) - >>> loader = zarr.load('data/example.zip') - >>> loader - - >>> loader['foo'] - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - >>> loader['bar'] - array([10000, 9999, 9998, ..., 3, 2, 1]) - - Notes - ----- - Default compression options will be used. - - """ - if len(args) == 0 and len(kwargs) == 0: - raise ValueError("at least one array must be provided") - # handle polymorphic store arg - may_need_closing = _might_close(store) - _store: BaseStore = normalize_store_arg(store, mode="w", zarr_version=zarr_version) - path = _check_and_update_path(_store, path) - try: - grp = _create_group(_store, path=path, overwrite=True, zarr_version=zarr_version) - for i, arr in enumerate(args): - k = f"arr_{i}" - grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) - for k, arr in kwargs.items(): - grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) - finally: - if may_need_closing: - # needed to ensure zip file records are written - _store.close() - - -def save(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): - """Convenience function to save an array or group of arrays to the local file system. - - Parameters - ---------- - store : MutableMapping or string - Store or path to directory in file system or name of zip file. - args : ndarray - NumPy arrays with data to save. - zarr_version : {2, 3, None}, optional - The zarr protocol version to use when saving. The default value of None - will attempt to infer the version from `store` if possible, otherwise - it will fall back to 2. - - .. warning:: `zarr_version=3` is currently using the experimental Zarr V3 - implementation. This implementation is not in sync with the final specification - and will be replaced with a spec compliant version in the version 3.0. - - path : str or None, optional - The path within the group where the arrays will be saved. - kwargs - NumPy arrays with data to save. - - Examples - -------- - Save an array to a directory on the file system (uses a :class:`DirectoryStore`):: - - >>> import zarr - >>> import numpy as np - >>> arr = np.arange(10000) - >>> zarr.save('data/example.zarr', arr) - >>> zarr.load('data/example.zarr') - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - - Save an array to a Zip file (uses a :class:`ZipStore`):: - - >>> zarr.save('data/example.zip', arr) - >>> zarr.load('data/example.zip') - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - - Save several arrays to a directory on the file system (uses a - :class:`DirectoryStore` and stores arrays in a group):: - - >>> import zarr - >>> import numpy as np - >>> a1 = np.arange(10000) - >>> a2 = np.arange(10000, 0, -1) - >>> zarr.save('data/example.zarr', a1, a2) - >>> loader = zarr.load('data/example.zarr') - >>> loader - - >>> loader['arr_0'] - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - >>> loader['arr_1'] - array([10000, 9999, 9998, ..., 3, 2, 1]) - - Save several arrays using named keyword arguments:: - - >>> zarr.save('data/example.zarr', foo=a1, bar=a2) - >>> loader = zarr.load('data/example.zarr') - >>> loader - - >>> loader['foo'] - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - >>> loader['bar'] - array([10000, 9999, 9998, ..., 3, 2, 1]) - - Store several arrays in a single zip file (uses a :class:`ZipStore`):: - - >>> zarr.save('data/example.zip', foo=a1, bar=a2) - >>> loader = zarr.load('data/example.zip') - >>> loader - - >>> loader['foo'] - array([ 0, 1, 2, ..., 9997, 9998, 9999]) - >>> loader['bar'] - array([10000, 9999, 9998, ..., 3, 2, 1]) - - See Also - -------- - save_array, save_group - - """ - if len(args) == 0 and len(kwargs) == 0: - raise ValueError("at least one array must be provided") - if len(args) == 1 and len(kwargs) == 0: - save_array(store, args[0], zarr_version=zarr_version, path=path) - else: - save_group(store, *args, zarr_version=zarr_version, path=path, **kwargs) - - -class LazyLoader(Mapping): - def __init__(self, grp): - self.grp = grp - self.cache = dict() - - def __getitem__(self, item): - try: - return self.cache[item] - except KeyError: - arr = self.grp[item][...] - self.cache[item] = arr - return arr - - def __len__(self): - return len(self.grp) - - def __iter__(self): - return iter(self.grp) - - def __contains__(self, item): - return item in self.grp - - def __repr__(self): - r = ">> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> g4 = g3.create_group('baz') - >>> g5 = g3.create_group('qux') - >>> d1 = g5.create_dataset('baz', shape=100, chunks=10) - >>> g1.tree() - / - ├── bar - │ ├── baz - │ └── qux - │ └── baz (100,) float64 - └── foo - >>> import h5py - >>> h5f = h5py.File('data/example.h5', mode='w') - >>> zarr.copy_all(g1, h5f) - (5, 0, 800) - >>> zarr.tree(h5f) - / - ├── bar - │ ├── baz - │ └── qux - │ └── baz (100,) float64 - └── foo - - See Also - -------- - zarr.hierarchy.Group.tree - - Notes - ----- - Please note that this is an experimental feature. The behaviour of this - function is still evolving and the default output and/or parameters may change - in future versions. - - """ - - return TreeViewer(grp, expand=expand, level=level) - - -class _LogWriter: - def __init__(self, log): - self.log_func = None - self.log_file = None - self.needs_closing = False - if log is None: - # don't do any logging - pass - elif callable(log): - self.log_func = log - elif isinstance(log, str): - self.log_file = _builtin_open(log, mode="w") - self.needs_closing = True - elif hasattr(log, "write"): - self.log_file = log - else: - raise TypeError( - f"log must be a callable function, file path or file-like object, found {log!r}" - ) - - def __enter__(self): - return self - - def __exit__(self, *args): - if self.log_file is not None and self.needs_closing: - self.log_file.close() - - def __call__(self, *args, **kwargs): - if self.log_file is not None: - kwargs["file"] = self.log_file - print(*args, **kwargs) - if hasattr(self.log_file, "flush"): - # get immediate feedback - self.log_file.flush() - elif self.log_func is not None: - self.log_func(*args, **kwargs) - - -def _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied): - # log a final message with a summary of what happened - if dry_run: - message = "dry run: " - else: - message = "all done: " - message += f"{n_copied:,} copied, {n_skipped:,} skipped" - if not dry_run: - message += f", {n_bytes_copied:,} bytes copied" - log(message) - - -def copy_store( - source, - dest, - source_path="", - dest_path="", - excludes=None, - includes=None, - flags=0, - if_exists="raise", - dry_run=False, - log=None, -): - """Copy data directly from the `source` store to the `dest` store. Use this - function when you want to copy a group or array in the most efficient way, - preserving all configuration and attributes. This function is more efficient - than the copy() or copy_all() functions because it avoids de-compressing and - re-compressing data, rather the compressed chunk data for each array are - copied directly between stores. - - Parameters - ---------- - source : Mapping - Store to copy data from. - dest : MutableMapping - Store to copy data into. - source_path : str, optional - Only copy data from under this path in the source store. - dest_path : str, optional - Copy data into this path in the destination store. - excludes : sequence of str, optional - One or more regular expressions which will be matched against keys in - the source store. Any matching key will not be copied. - includes : sequence of str, optional - One or more regular expressions which will be matched against keys in - the source store and will override any excludes also matching. - flags : int, optional - Regular expression flags used for matching excludes and includes. - if_exists : {'raise', 'replace', 'skip'}, optional - How to handle keys that already exist in the destination store. If - 'raise' then a CopyError is raised on the first key already present - in the destination store. If 'replace' then any data will be replaced in - the destination. If 'skip' then any existing keys will not be copied. - dry_run : bool, optional - If True, don't actually copy anything, just log what would have - happened. - log : callable, file path or file-like object, optional - If provided, will be used to log progress information. - - Returns - ------- - n_copied : int - Number of items copied. - n_skipped : int - Number of items skipped. - n_bytes_copied : int - Number of bytes of data that were actually copied. - - Examples - -------- - - >>> import zarr - >>> store1 = zarr.DirectoryStore('data/example.zarr') - >>> root = zarr.group(store1, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.create_group('bar') - >>> baz = bar.create_dataset('baz', shape=100, chunks=50, dtype='i8') - >>> import numpy as np - >>> baz[:] = np.arange(100) - >>> root.tree() - / - └── foo - └── bar - └── baz (100,) int64 - >>> from sys import stdout - >>> store2 = zarr.ZipStore('data/example.zip', mode='w') - >>> zarr.copy_store(store1, store2, log=stdout) - copy .zgroup - copy foo/.zgroup - copy foo/bar/.zgroup - copy foo/bar/baz/.zarray - copy foo/bar/baz/0 - copy foo/bar/baz/1 - all done: 6 copied, 0 skipped, 566 bytes copied - (6, 0, 566) - >>> new_root = zarr.group(store2) - >>> new_root.tree() - / - └── foo - └── bar - └── baz (100,) int64 - >>> new_root['foo/bar/baz'][:] - array([ 0, 1, 2, ..., 97, 98, 99]) - >>> store2.close() # zip stores need to be closed - - Notes - ----- - Please note that this is an experimental feature. The behaviour of this - function is still evolving and the default behaviour and/or parameters may change - in future versions. - - """ - - # normalize paths - source_path = normalize_storage_path(source_path) - dest_path = normalize_storage_path(dest_path) - if source_path: - source_path = source_path + "/" - if dest_path: - dest_path = dest_path + "/" - - # normalize excludes and includes - if excludes is None: - excludes = [] - elif isinstance(excludes, str): - excludes = [excludes] - if includes is None: - includes = [] - elif isinstance(includes, str): - includes = [includes] - excludes = [re.compile(e, flags) for e in excludes] - includes = [re.compile(i, flags) for i in includes] - - # check if_exists parameter - valid_if_exists = ["raise", "replace", "skip"] - if if_exists not in valid_if_exists: - raise ValueError(f"if_exists must be one of {valid_if_exists!r}; found {if_exists!r}") - - # setup counting variables - n_copied = n_skipped = n_bytes_copied = 0 - - source_store_version = getattr(source, "_store_version", 2) - dest_store_version = getattr(dest, "_store_version", 2) - if source_store_version != dest_store_version: - raise ValueError("zarr stores must share the same protocol version") - - if source_store_version > 2: - nchar_root = len(meta_root) - # code below assumes len(meta_root) === len(data_root) - assert len(data_root) == nchar_root - - # setup logging - with _LogWriter(log) as log: - # iterate over source keys - for source_key in sorted(source.keys()): - # filter to keys under source path - if source_store_version == 2: - if not source_key.startswith(source_path): - continue - elif source_store_version == 3: - # skip 'meta/root/' or 'data/root/' at start of source_key - if not source_key[nchar_root:].startswith(source_path): - continue - - # process excludes and includes - exclude = False - for prog in excludes: - if prog.search(source_key): - exclude = True - break - if exclude: - for prog in includes: - if prog.search(source_key): - exclude = False - break - if exclude: - continue - - # map key to destination path - if source_store_version == 2: - key_suffix = source_key[len(source_path) :] - dest_key = dest_path + key_suffix - elif source_store_version == 3: - # nchar_root is length of 'meta/root/' or 'data/root/' - key_suffix = source_key[nchar_root + len(source_path) :] - dest_key = source_key[:nchar_root] + dest_path + key_suffix - - # create a descriptive label for this operation - descr = source_key - if dest_key != source_key: - descr = descr + " -> " + dest_key - - # decide what to do - do_copy = True - if if_exists != "replace": - if dest_key in dest: - if if_exists == "raise": - raise CopyError(f"key {dest_key!r} exists in destination") - elif if_exists == "skip": - do_copy = False - - # take action - if do_copy: - log(f"copy {descr}") - if not dry_run: - data = source[source_key] - n_bytes_copied += buffer_size(data) - dest[dest_key] = data - n_copied += 1 - else: - log(f"skip {descr}") - n_skipped += 1 - - # log a final message with a summary of what happened - _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied) - - return n_copied, n_skipped, n_bytes_copied - - -def _check_dest_is_group(dest): - if not hasattr(dest, "create_dataset"): - raise ValueError(f"dest must be a group, got {dest!r}") - - -def copy( - source, - dest, - name=None, - shallow=False, - without_attrs=False, - log=None, - if_exists="raise", - dry_run=False, - **create_kws, -): - """Copy the `source` array or group into the `dest` group. - - Parameters - ---------- - source : group or array/dataset - A zarr group or array, or an h5py group or dataset. - dest : group - A zarr or h5py group. - name : str, optional - Name to copy the object to. - shallow : bool, optional - If True, only copy immediate children of `source`. - without_attrs : bool, optional - Do not copy user attributes. - log : callable, file path or file-like object, optional - If provided, will be used to log progress information. - if_exists : {'raise', 'replace', 'skip', 'skip_initialized'}, optional - How to handle arrays that already exist in the destination group. If - 'raise' then a CopyError is raised on the first array already present - in the destination group. If 'replace' then any array will be - replaced in the destination. If 'skip' then any existing arrays will - not be copied. If 'skip_initialized' then any existing arrays with - all chunks initialized will not be copied (not available when copying to - h5py). - dry_run : bool, optional - If True, don't actually copy anything, just log what would have - happened. - **create_kws - Passed through to the create_dataset method when copying an array/dataset. - - Returns - ------- - n_copied : int - Number of items copied. - n_skipped : int - Number of items skipped. - n_bytes_copied : int - Number of bytes of data that were actually copied. - - Examples - -------- - Here's an example of copying a group named 'foo' from an HDF5 file to a - Zarr group:: - - >>> import h5py - >>> import zarr - >>> import numpy as np - >>> source = h5py.File('data/example.h5', mode='w') - >>> foo = source.create_group('foo') - >>> baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) - >>> spam = source.create_dataset('spam', data=np.arange(100, 200), chunks=(30,)) - >>> zarr.tree(source) - / - ├── foo - │ └── bar - │ └── baz (100,) int64 - └── spam (100,) int64 - >>> dest = zarr.group() - >>> from sys import stdout - >>> zarr.copy(source['foo'], dest, log=stdout) - copy /foo - copy /foo/bar - copy /foo/bar/baz (100,) int64 - all done: 3 copied, 0 skipped, 800 bytes copied - (3, 0, 800) - >>> dest.tree() # N.B., no spam - / - └── foo - └── bar - └── baz (100,) int64 - >>> source.close() - - The ``if_exists`` parameter provides options for how to handle pre-existing data in - the destination. Here are some examples of these options, also using - ``dry_run=True`` to find out what would happen without actually copying anything:: - - >>> source = zarr.group() - >>> dest = zarr.group() - >>> baz = source.create_dataset('foo/bar/baz', data=np.arange(100)) - >>> spam = source.create_dataset('foo/spam', data=np.arange(1000)) - >>> existing_spam = dest.create_dataset('foo/spam', data=np.arange(1000)) - >>> from sys import stdout - >>> try: - ... zarr.copy(source['foo'], dest, log=stdout, dry_run=True) - ... except zarr.CopyError as e: - ... print(e) - ... - copy /foo - copy /foo/bar - copy /foo/bar/baz (100,) int64 - an object 'spam' already exists in destination '/foo' - >>> zarr.copy(source['foo'], dest, log=stdout, if_exists='replace', dry_run=True) - copy /foo - copy /foo/bar - copy /foo/bar/baz (100,) int64 - copy /foo/spam (1000,) int64 - dry run: 4 copied, 0 skipped - (4, 0, 0) - >>> zarr.copy(source['foo'], dest, log=stdout, if_exists='skip', dry_run=True) - copy /foo - copy /foo/bar - copy /foo/bar/baz (100,) int64 - skip /foo/spam (1000,) int64 - dry run: 3 copied, 1 skipped - (3, 1, 0) - - Notes - ----- - Please note that this is an experimental feature. The behaviour of this - function is still evolving and the default behaviour and/or parameters may change - in future versions. - - """ - - # value checks - _check_dest_is_group(dest) - - # setup logging - with _LogWriter(log) as log: - # do the copying - n_copied, n_skipped, n_bytes_copied = _copy( - log, - source, - dest, - name=name, - root=True, - shallow=shallow, - without_attrs=without_attrs, - if_exists=if_exists, - dry_run=dry_run, - **create_kws, - ) - - # log a final message with a summary of what happened - _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied) - - return n_copied, n_skipped, n_bytes_copied - - -def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_run, **create_kws): - # N.B., if this is a dry run, dest may be None - - # setup counting variables - n_copied = n_skipped = n_bytes_copied = 0 - - # are we copying to/from h5py? - source_h5py = source.__module__.startswith("h5py.") - dest_h5py = dest is not None and dest.__module__.startswith("h5py.") - - # check if_exists parameter - valid_if_exists = ["raise", "replace", "skip", "skip_initialized"] - if if_exists not in valid_if_exists: - raise ValueError(f"if_exists must be one of {valid_if_exists!r}; found {if_exists!r}") - if dest_h5py and if_exists == "skip_initialized": - raise ValueError(f"{if_exists!r} can only be used when copying to zarr") - - # determine name to copy to - if name is None: - name = source.name.split("/")[-1] - if not name: - # this can happen if source is the root group - raise TypeError( - "source has no name, please provide the `name` " - "parameter to indicate a name to copy to" - ) - - if hasattr(source, "shape"): - # copy a dataset/array - - # check if already exists, decide what to do - do_copy = True - exists = dest is not None and name in dest - if exists: - if if_exists == "raise": - raise CopyError(f"an object {name!r} already exists in destination {dest.name!r}") - elif if_exists == "skip": - do_copy = False - elif if_exists == "skip_initialized": - ds = dest[name] - if ds.nchunks_initialized == ds.nchunks: - do_copy = False - - # take action - if do_copy: - # log a message about what we're going to do - log(f"copy {source.name} {source.shape} {source.dtype}") - - if not dry_run: - # clear the way - if exists: - del dest[name] - - # setup creation keyword arguments - kws = create_kws.copy() - - # setup chunks option, preserve by default - kws.setdefault("chunks", source.chunks) - - # setup compression options - if source_h5py: - if dest_h5py: - # h5py -> h5py; preserve compression options by default - kws.setdefault("compression", source.compression) - kws.setdefault("compression_opts", source.compression_opts) - kws.setdefault("shuffle", source.shuffle) - kws.setdefault("fletcher32", source.fletcher32) - kws.setdefault("fillvalue", source.fillvalue) - else: - # h5py -> zarr; use zarr default compression options - kws.setdefault("fill_value", source.fillvalue) - else: - if dest_h5py: - # zarr -> h5py; use some vaguely sensible defaults - kws.setdefault("chunks", True) - kws.setdefault("compression", "gzip") - kws.setdefault("compression_opts", 1) - kws.setdefault("shuffle", False) - kws.setdefault("fillvalue", source.fill_value) - else: - # zarr -> zarr; preserve compression options by default - kws.setdefault("compressor", source.compressor) - kws.setdefault("filters", source.filters) - kws.setdefault("order", source.order) - kws.setdefault("fill_value", source.fill_value) - - # create new dataset in destination - ds = dest.create_dataset(name, shape=source.shape, dtype=source.dtype, **kws) - - # copy data - N.B., go chunk by chunk to avoid loading - # everything into memory - shape = ds.shape - chunks = ds.chunks - chunk_offsets = [range(0, s, c) for s, c in zip(shape, chunks)] - for offset in itertools.product(*chunk_offsets): - sel = tuple(slice(o, min(s, o + c)) for o, s, c in zip(offset, shape, chunks)) - ds[sel] = source[sel] - n_bytes_copied += ds.size * ds.dtype.itemsize - - # copy attributes - if not without_attrs: - if dest_h5py and "filters" in source.attrs: - # No filters key in v3 metadata so it was stored in the - # attributes instead. We cannot copy this key to - # HDF5 attrs, though! - source_attrs = source.attrs.asdict().copy() - source_attrs.pop("filters", None) - else: - source_attrs = source.attrs - ds.attrs.update(source_attrs) - - n_copied += 1 - - else: - log(f"skip {source.name} {source.shape} {source.dtype}") - n_skipped += 1 - - elif root or not shallow: - # copy a group - - # check if an array is in the way - do_copy = True - exists_array = dest is not None and name in dest and hasattr(dest[name], "shape") - if exists_array: - if if_exists == "raise": - raise CopyError(f"an array {name!r} already exists in destination {dest.name!r}") - elif if_exists == "skip": - do_copy = False - - # take action - if do_copy: - # log action - log(f"copy {source.name}") - - if not dry_run: - # clear the way - if exists_array: - del dest[name] - - # require group in destination - grp = dest.require_group(name) - - # copy attributes - if not without_attrs: - grp.attrs.update(source.attrs) - - else: - # setup for dry run without creating any groups in the - # destination - if dest is not None: - grp = dest.get(name, None) - else: - grp = None - - # recurse - for k in source.keys(): - c, s, b = _copy( - log, - source[k], - grp, - name=k, - root=False, - shallow=shallow, - without_attrs=without_attrs, - if_exists=if_exists, - dry_run=dry_run, - **create_kws, - ) - n_copied += c - n_skipped += s - n_bytes_copied += b - - n_copied += 1 - - else: - log(f"skip {source.name}") - n_skipped += 1 - - return n_copied, n_skipped, n_bytes_copied - - -def copy_all( - source, - dest, - shallow=False, - without_attrs=False, - log=None, - if_exists="raise", - dry_run=False, - **create_kws, -): - """Copy all children of the `source` group into the `dest` group. - - Parameters - ---------- - source : group or array/dataset - A zarr group or array, or an h5py group or dataset. - dest : group - A zarr or h5py group. - shallow : bool, optional - If True, only copy immediate children of `source`. - without_attrs : bool, optional - Do not copy user attributes. - log : callable, file path or file-like object, optional - If provided, will be used to log progress information. - if_exists : {'raise', 'replace', 'skip', 'skip_initialized'}, optional - How to handle arrays that already exist in the destination group. If - 'raise' then a CopyError is raised on the first array already present - in the destination group. If 'replace' then any array will be - replaced in the destination. If 'skip' then any existing arrays will - not be copied. If 'skip_initialized' then any existing arrays with - all chunks initialized will not be copied (not available when copying to - h5py). - dry_run : bool, optional - If True, don't actually copy anything, just log what would have - happened. - **create_kws - Passed through to the create_dataset method when copying an - array/dataset. - - Returns - ------- - n_copied : int - Number of items copied. - n_skipped : int - Number of items skipped. - n_bytes_copied : int - Number of bytes of data that were actually copied. - - Examples - -------- - >>> import h5py - >>> import zarr - >>> import numpy as np - >>> source = h5py.File('data/example.h5', mode='w') - >>> foo = source.create_group('foo') - >>> baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) - >>> spam = source.create_dataset('spam', data=np.arange(100, 200), chunks=(30,)) - >>> zarr.tree(source) - / - ├── foo - │ └── bar - │ └── baz (100,) int64 - └── spam (100,) int64 - >>> dest = zarr.group() - >>> import sys - >>> zarr.copy_all(source, dest, log=sys.stdout) - copy /foo - copy /foo/bar - copy /foo/bar/baz (100,) int64 - copy /spam (100,) int64 - all done: 4 copied, 0 skipped, 1,600 bytes copied - (4, 0, 1600) - >>> dest.tree() - / - ├── foo - │ └── bar - │ └── baz (100,) int64 - └── spam (100,) int64 - >>> source.close() - - Notes - ----- - Please note that this is an experimental feature. The behaviour of this - function is still evolving and the default behaviour and/or parameters may change - in future versions. - - """ - - # value checks - _check_dest_is_group(dest) - - # setup counting variables - n_copied = n_skipped = n_bytes_copied = 0 - - zarr_version = getattr(source, "_version", 2) - - # setup logging - with _LogWriter(log) as log: - for k in source.keys(): - c, s, b = _copy( - log, - source[k], - dest, - name=k, - root=False, - shallow=shallow, - without_attrs=without_attrs, - if_exists=if_exists, - dry_run=dry_run, - **create_kws, - ) - n_copied += c - n_skipped += s - n_bytes_copied += b - if zarr_version == 2: - dest.attrs.update(**source.attrs) - - # log a final message with a summary of what happened - _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied) - - return n_copied, n_skipped, n_bytes_copied - - -def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path=""): - """ - Consolidate all metadata for groups and arrays within the given store - into a single resource and put it under the given key. - - This produces a single object in the backend store, containing all the - metadata read from all the zarr-related keys that can be found. After - metadata have been consolidated, use :func:`open_consolidated` to open - the root group in optimised, read-only mode, using the consolidated - metadata to reduce the number of read operations on the backend store. - - Note, that if the metadata in the store is changed after this - consolidation, then the metadata read by :func:`open_consolidated` - would be incorrect unless this function is called again. - - .. note:: This is an experimental feature. - - Parameters - ---------- - store : MutableMapping or string - Store or path to directory in file system or name of zip file. - metadata_key : str - Key to put the consolidated metadata under. - path : str or None - Path corresponding to the group that is being consolidated. Not required - for zarr v2 stores. - - Returns - ------- - g : :class:`zarr.hierarchy.Group` - Group instance, opened with the new consolidated metadata. - - See Also - -------- - open_consolidated - - """ - store = normalize_store_arg(store, mode="w") - - version = store._store_version - - if version == 2: - - def is_zarr_key(key): - return key.endswith(".zarray") or key.endswith(".zgroup") or key.endswith(".zattrs") - - else: - assert_zarr_v3_api_available() - - sfx = _get_metadata_suffix(store) # type: ignore - - def is_zarr_key(key): - return ( - key.endswith(".array" + sfx) or key.endswith(".group" + sfx) or key == "zarr.json" - ) - - # cannot create a group without a path in v3 - # so create /meta/root/consolidated group to store the metadata - if "consolidated" not in store: - _create_group(store, path="consolidated") - if not metadata_key.startswith("meta/root/"): - metadata_key = "meta/root/consolidated/" + metadata_key - # path = 'consolidated' - - out = { - "zarr_consolidated_format": 1, - "metadata": {key: json_loads(store[key]) for key in store if is_zarr_key(key)}, - } - store[metadata_key] = json_dumps(out) - return open_consolidated(store, metadata_key=metadata_key, path=path) - - -def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", **kwargs): - """Open group using metadata previously consolidated into a single key. - - This is an optimised method for opening a Zarr group, where instead of - traversing the group/array hierarchy by accessing the metadata keys at - each level, a single key contains all of the metadata for everything. - For remote data sources where the overhead of accessing a key is large - compared to the time to read data. - - The group accessed must have already had its metadata consolidated into a - single key using the function :func:`consolidate_metadata`. - - This optimised method only works in modes which do not change the - metadata, although the data may still be written/updated. - - Parameters - ---------- - store : MutableMapping or string - Store or path to directory in file system or name of zip file. - metadata_key : str - Key to read the consolidated metadata from. The default (.zmetadata) - corresponds to the default used by :func:`consolidate_metadata`. - mode : {'r', 'r+'}, optional - Persistence mode: 'r' means read only (must exist); 'r+' means - read/write (must exist) although only writes to data are allowed, - changes to metadata including creation of new arrays or group - are not allowed. - **kwargs - Additional parameters are passed through to :func:`zarr.creation.open_array` or - :func:`zarr.hierarchy.open_group`. - - Returns - ------- - g : :class:`zarr.hierarchy.Group` - Group instance, opened with the consolidated metadata. - - See Also - -------- - consolidate_metadata - - """ - - # normalize parameters - zarr_version = kwargs.get("zarr_version") - store = normalize_store_arg( - store, storage_options=kwargs.get("storage_options"), mode=mode, zarr_version=zarr_version - ) - if mode not in {"r", "r+"}: - raise ValueError(f"invalid mode, expected either 'r' or 'r+'; found {mode!r}") - - path = kwargs.pop("path", None) - if store._store_version == 2: - ConsolidatedStoreClass = ConsolidatedMetadataStore - else: - assert_zarr_v3_api_available() - ConsolidatedStoreClass = ConsolidatedMetadataStoreV3 - # default is to store within 'consolidated' group on v3 - if not metadata_key.startswith("meta/root/"): - metadata_key = "meta/root/consolidated/" + metadata_key - - # setup metadata store - meta_store = ConsolidatedStoreClass(store, metadata_key=metadata_key) - - # pass through - chunk_store = kwargs.pop("chunk_store", None) or store - return open(store=meta_store, chunk_store=chunk_store, mode=mode, path=path, **kwargs) diff --git a/zarr/core.py b/zarr/core.py deleted file mode 100644 index d13da27bc6..0000000000 --- a/zarr/core.py +++ /dev/null @@ -1,2958 +0,0 @@ -import binascii -import hashlib -import itertools -import math -import operator -import re -from functools import reduce -from typing import Any - -import numpy as np -from numcodecs.compat import ensure_bytes - -from zarr._storage.store import _prefix_to_attrs_key, assert_zarr_v3_api_available -from zarr.attrs import Attributes -from zarr.codecs import AsType, get_codec -from zarr.context import Context -from zarr.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError -from zarr.indexing import ( - BasicIndexer, - CoordinateIndexer, - MaskIndexer, - OIndex, - OrthogonalIndexer, - VIndex, - BlockIndex, - BlockIndexer, - PartialChunkIterator, - check_fields, - check_no_multi_fields, - ensure_tuple, - err_too_many_indices, - is_contiguous_selection, - is_pure_fancy_indexing, - is_pure_orthogonal_indexing, - is_scalar, - pop_fields, -) -from zarr.storage import ( - _get_hierarchy_metadata, - _prefix_to_array_key, - KVStore, - getsize, - listdir, - normalize_store_arg, -) -from zarr.util import ( - ConstantMap, - all_equal, - InfoReporter, - check_array_shape, - human_readable_size, - is_total_slice, - nolock, - normalize_chunks, - normalize_resize_args, - normalize_shape, - normalize_storage_path, - PartialReadBuffer, - UncompressedPartialReadBufferV3, - ensure_ndarray_like, -) - -__all__ = ["Array"] - - -# noinspection PyUnresolvedReferences -class Array: - """Instantiate an array from an initialized store. - - Parameters - ---------- - store : MutableMapping - Array store, already initialized. - path : string, optional - Storage path. - read_only : bool, optional - True if array should be protected against modification. - chunk_store : MutableMapping, optional - Separate storage for chunks. If not provided, `store` will be used - for storage of both chunks and metadata. - synchronizer : object, optional - Array synchronizer. - cache_metadata : bool, optional - If True (default), array configuration metadata will be cached for the - lifetime of the object. If False, array metadata will be reloaded - prior to all data access and modification operations (may incur - overhead depending on storage and data access pattern). - cache_attrs : bool, optional - If True (default), user attributes will be cached for attribute read - operations. If False, user attributes are reloaded from the store prior - to all attribute read operations. - partial_decompress : bool, optional - If True and while the chunk_store is a FSStore and the compression used - is Blosc, when getting data from the array chunks will be partially - read and decompressed when possible. - - .. versionadded:: 2.7 - - write_empty_chunks : bool, optional - If True, all chunks will be stored regardless of their contents. If - False (default), each chunk is compared to the array's fill value prior - to storing. If a chunk is uniformly equal to the fill value, then that - chunk is not be stored, and the store entry for that chunk's key is - deleted. This setting enables sparser storage, as only chunks with - non-fill-value data are stored, at the expense of overhead associated - with checking the data of each chunk. - - .. versionadded:: 2.11 - - meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. - - .. versionadded:: 2.13 - """ - - def __init__( - self, - store: Any, # BaseStore not strictly required due to normalize_store_arg - path=None, - read_only=False, - chunk_store=None, - synchronizer=None, - cache_metadata=True, - cache_attrs=True, - partial_decompress=False, - write_empty_chunks=True, - zarr_version=None, - meta_array=None, - ): - # N.B., expect at this point store is fully initialized with all - # configuration metadata fully specified and normalized - - store = normalize_store_arg(store, zarr_version=zarr_version) - if zarr_version is None: - zarr_version = store._store_version - - if zarr_version != 2: - assert_zarr_v3_api_available() - - if chunk_store is not None: - chunk_store = normalize_store_arg(chunk_store, zarr_version=zarr_version) - - self._store = store - self._chunk_store = chunk_store - self._transformed_chunk_store = None - self._path = normalize_storage_path(path) - if self._path: - self._key_prefix = self._path + "/" - else: - self._key_prefix = "" - self._read_only = bool(read_only) - self._synchronizer = synchronizer - self._cache_metadata = cache_metadata - self._is_view = False - self._partial_decompress = partial_decompress - self._write_empty_chunks = write_empty_chunks - if meta_array is not None: - self._meta_array = np.empty_like(meta_array, shape=()) - else: - self._meta_array = np.empty(()) - self._version = zarr_version - if self._version == 3: - self._data_key_prefix = "data/root/" + self._key_prefix - self._data_path = "data/root/" + self._path - self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) - self._metadata_key_suffix = self._hierarchy_metadata["metadata_key_suffix"] - - # initialize metadata - self._load_metadata() - - # initialize attributes - akey = _prefix_to_attrs_key(self._store, self._key_prefix) - self._attrs = Attributes( - store, - key=akey, - read_only=read_only, - synchronizer=synchronizer, - cache=cache_attrs, - cached_dict=self._meta["attributes"] if self._version == 3 else None, - ) - - # initialize info reporter - - # initialize indexing helpers - self._oindex = OIndex(self) - self._vindex = VIndex(self) - self._blocks = BlockIndex(self) - - def _load_metadata(self): - """(Re)load metadata from store.""" - if self._synchronizer is None: - self._load_metadata_nosync() - else: - mkey = _prefix_to_array_key(self._store, self._key_prefix) - with self._synchronizer[mkey]: - self._load_metadata_nosync() - - def _load_metadata_nosync(self): - try: - mkey = _prefix_to_array_key(self._store, self._key_prefix) - meta_bytes = self._store[mkey] - except KeyError as e: - raise ArrayNotFoundError(self._path) from e - else: - # decode and store metadata as instance members - meta = self._store._metadata_class.decode_array_metadata(meta_bytes) - self._meta = meta - self._shape = meta["shape"] - self._fill_value = meta["fill_value"] - dimension_separator = meta.get("dimension_separator", None) - if self._version == 2: - self._chunks = meta["chunks"] - self._dtype = meta["dtype"] - self._order = meta["order"] - if dimension_separator is None: - try: - dimension_separator = self._store._dimension_separator - except (AttributeError, KeyError): - pass - - # Fallback for any stores which do not choose a default - if dimension_separator is None: - dimension_separator = "." - else: - self._chunks = meta["chunk_grid"]["chunk_shape"] - self._dtype = meta["data_type"] - self._order = meta["chunk_memory_layout"] - chunk_separator = meta["chunk_grid"]["separator"] - if dimension_separator is None: - dimension_separator = meta.get("dimension_separator", chunk_separator) - - self._dimension_separator = dimension_separator - - # setup compressor - compressor = meta.get("compressor", None) - if compressor is None: - self._compressor = None - elif self._version == 2: - self._compressor = get_codec(compressor) - else: - self._compressor = compressor - - # setup filters - if self._version == 2: - filters = meta.get("filters", []) - else: - # TODO: storing filters under attributes for now since the v3 - # array metadata does not have a 'filters' attribute. - filters = meta["attributes"].get("filters", []) - if filters: - filters = [get_codec(config) for config in filters] - self._filters = filters - - if self._version == 3: - storage_transformers = meta.get("storage_transformers", []) - if storage_transformers: - transformed_store = self._chunk_store or self._store - for storage_transformer in storage_transformers[::-1]: - transformed_store = storage_transformer._copy_for_array( - self, transformed_store - ) - self._transformed_chunk_store = transformed_store - - def _refresh_metadata(self): - if not self._cache_metadata: - self._load_metadata() - - def _refresh_metadata_nosync(self): - if not self._cache_metadata and not self._is_view: - self._load_metadata_nosync() - - def _flush_metadata_nosync(self): - if self._is_view: - raise PermissionError("operation not permitted for views") - - if self._compressor: - compressor_config = self._compressor.get_config() - else: - compressor_config = None - if self._filters: - filters_config = [f.get_config() for f in self._filters] - else: - filters_config = None - _compressor = compressor_config if self._version == 2 else self._compressor - meta = dict( - shape=self._shape, - compressor=_compressor, - fill_value=self._fill_value, - filters=filters_config, - ) - if getattr(self._store, "_store_version", 2) == 2: - meta.update( - dict( - chunks=self._chunks, - dtype=self._dtype, - order=self._order, - dimension_separator=self._dimension_separator, - ) - ) - else: - meta.update( - dict( - chunk_grid=dict( - type="regular", - chunk_shape=self._chunks, - separator=self._dimension_separator, - ), - data_type=self._dtype, - chunk_memory_layout=self._order, - attributes=self.attrs.asdict(), - ) - ) - mkey = _prefix_to_array_key(self._store, self._key_prefix) - self._store[mkey] = self._store._metadata_class.encode_array_metadata(meta) - - @property - def store(self): - """A MutableMapping providing the underlying storage for the array.""" - return self._store - - @property - def path(self): - """Storage path.""" - return self._path - - @property - def name(self): - """Array name following h5py convention.""" - if self.path: - # follow h5py convention: add leading slash - name = self.path - if name[0] != "/": - name = "/" + name - return name - return None - - @property - def basename(self): - """Final component of name.""" - if self.name is not None: - return self.name.split("/")[-1] - return None - - @property - def read_only(self): - """A boolean, True if modification operations are not permitted.""" - return self._read_only - - @read_only.setter - def read_only(self, value): - self._read_only = bool(value) - - @property - def chunk_store(self): - """A MutableMapping providing the underlying storage for array chunks.""" - if self._transformed_chunk_store is not None: - return self._transformed_chunk_store - elif self._chunk_store is not None: - return self._chunk_store - else: - return self._store - - @property - def shape(self): - """A tuple of integers describing the length of each dimension of - the array.""" - # N.B., shape may change if array is resized, hence need to refresh - # metadata - self._refresh_metadata() - return self._shape - - @shape.setter - def shape(self, value): - self.resize(value) - - @property - def chunks(self): - """A tuple of integers describing the length of each dimension of a - chunk of the array.""" - return self._chunks - - @property - def dtype(self): - """The NumPy data type.""" - return self._dtype - - @property - def compressor(self): - """Primary compression codec.""" - return self._compressor - - @property - def fill_value(self): - """A value used for uninitialized portions of the array.""" - return self._fill_value - - @fill_value.setter - def fill_value(self, new): - self._fill_value = new - self._flush_metadata_nosync() - - @property - def order(self): - """A string indicating the order in which bytes are arranged within - chunks of the array.""" - return self._order - - @property - def filters(self): - """One or more codecs used to transform data prior to compression.""" - return self._filters - - @property - def synchronizer(self): - """Object used to synchronize write access to the array.""" - return self._synchronizer - - @property - def attrs(self): - """A MutableMapping containing user-defined attributes. Note that - attribute values must be JSON serializable.""" - return self._attrs - - @property - def ndim(self): - """Number of dimensions.""" - return len(self._shape) - - @property - def _size(self): - return reduce(operator.mul, self._shape, 1) - - @property - def size(self): - """The total number of elements in the array.""" - # N.B., this property depends on shape, and shape may change if array - # is resized, hence need to refresh metadata - self._refresh_metadata() - return self._size - - @property - def itemsize(self): - """The size in bytes of each item in the array.""" - return self.dtype.itemsize - - @property - def _nbytes(self): - return self._size * self.itemsize - - @property - def nbytes(self): - """The total number of bytes that would be required to store the - array without compression.""" - # N.B., this property depends on shape, and shape may change if array - # is resized, hence need to refresh metadata - self._refresh_metadata() - return self._nbytes - - @property - def nbytes_stored(self): - """The total number of stored bytes of data for the array. This - includes storage required for configuration metadata and user - attributes.""" - m = getsize(self._store, self._path) - if self._chunk_store is None: - return m - else: - n = getsize(self._chunk_store, self._path) - if m < 0 or n < 0: - return -1 - else: - return m + n - - @property - def _cdata_shape(self): - if self._shape == (): - return (1,) - else: - return tuple(math.ceil(s / c) for s, c in zip(self._shape, self._chunks)) - - @property - def cdata_shape(self): - """A tuple of integers describing the number of chunks along each - dimension of the array.""" - self._refresh_metadata() - return self._cdata_shape - - @property - def _nchunks(self): - return reduce(operator.mul, self._cdata_shape, 1) - - @property - def nchunks(self): - """Total number of chunks.""" - self._refresh_metadata() - return self._nchunks - - @property - def nchunks_initialized(self): - """The number of chunks that have been initialized with some data.""" - - # count chunk keys - if self._version == 3: - # # key pattern for chunk keys - # prog = re.compile(r'\.'.join([r'c\d+'] * min(1, self.ndim))) - # # get chunk keys, excluding the prefix - # members = self.chunk_store.list_prefix(self._data_path) - # members = [k.split(self._data_key_prefix)[1] for k in members] - # # count the chunk keys - # return sum(1 for k in members if prog.match(k)) - - # key pattern for chunk keys - prog = re.compile(self._data_key_prefix + r"c\d+") # TODO: ndim == 0 case? - # get chunk keys, excluding the prefix - members = self.chunk_store.list_prefix(self._data_path) - # count the chunk keys - return sum(1 for k in members if prog.match(k)) - else: - # key pattern for chunk keys - prog = re.compile(r"\.".join([r"\d+"] * min(1, self.ndim))) - - # count chunk keys - return sum(1 for k in listdir(self.chunk_store, self._path) if prog.match(k)) - - # backwards compatibility - initialized = nchunks_initialized - - @property - def is_view(self): - """A boolean, True if this array is a view on another array.""" - return self._is_view - - @property - def oindex(self): - """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and - :func:`set_orthogonal_selection` for documentation and examples.""" - return self._oindex - - @property - def vindex(self): - """Shortcut for vectorized (inner) indexing, see :func:`get_coordinate_selection`, - :func:`set_coordinate_selection`, :func:`get_mask_selection` and - :func:`set_mask_selection` for documentation and examples.""" - return self._vindex - - @property - def blocks(self): - """Shortcut for blocked chunked indexing, see :func:`get_block_selection` and - :func:`set_block_selection` for documentation and examples.""" - return self._blocks - - @property - def write_empty_chunks(self) -> bool: - """A Boolean, True if chunks composed of the array's fill value - will be stored. If False, such chunks will not be stored. - """ - return self._write_empty_chunks - - @property - def meta_array(self): - """An array-like instance to use for determining arrays to create and return - to users. - """ - return self._meta_array - - def __eq__(self, other): - return ( - isinstance(other, Array) - and self.store == other.store - and self.read_only == other.read_only - and self.path == other.path - and not self._is_view - # N.B., no need to compare other properties, should be covered by - # store comparison - ) - - def __array__(self, *args, **kwargs): - return np.array(self[...], *args, **kwargs) - - def islice(self, start=None, end=None): - """ - Yield a generator for iterating over the entire or parts of the - array. Uses a cache so chunks only have to be decompressed once. - - Parameters - ---------- - start : int, optional - Start index for the generator to start at. Defaults to 0. - end : int, optional - End index for the generator to stop at. Defaults to self.shape[0]. - - Yields - ------ - out : generator - A generator that can be used to iterate over the requested region - the array. - - Examples - -------- - Setup a 1-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.array(np.arange(100)) - - Iterate over part of the array: - >>> for value in z.islice(25, 30): value; - np.int64(25) - np.int64(26) - np.int64(27) - np.int64(28) - np.int64(29) - """ - - if len(self.shape) == 0: - # Same error as numpy - raise TypeError("iteration over a 0-d array") - if start is None: - start = 0 - if end is None or end > self.shape[0]: - end = self.shape[0] - - if not isinstance(start, int) or start < 0: - raise ValueError("start must be a nonnegative integer") - - if not isinstance(end, int) or end < 0: - raise ValueError("end must be a nonnegative integer") - - # Avoid repeatedly decompressing chunks by iterating over the chunks - # in the first dimension. - chunk_size = self.chunks[0] - chunk = None - for j in range(start, end): - if j % chunk_size == 0: - chunk = self[j : j + chunk_size] - # init chunk if we start offset of chunk borders - elif chunk is None: - chunk_start = j - j % chunk_size - chunk_end = chunk_start + chunk_size - chunk = self[chunk_start:chunk_end] - yield chunk[j % chunk_size] - - def __iter__(self): - return self.islice() - - def __len__(self): - if self.shape: - return self.shape[0] - else: - # 0-dimensional array, same error message as numpy - raise TypeError("len() of unsized object") - - def __getitem__(self, selection): - """Retrieve data for an item or region of the array. - - Parameters - ---------- - selection : tuple - An integer index or slice or tuple of int/slice objects specifying the - requested item or region for each dimension of the array. - - Returns - ------- - out : ndarray - A NumPy array containing the data for the requested region. - - Examples - -------- - Setup a 1-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.array(np.arange(100)) - - Retrieve a single item:: - - >>> z[5] - np.int64(5) - - Retrieve a region via slicing:: - - >>> z[:5] - array([0, 1, 2, 3, 4]) - >>> z[-5:] - array([95, 96, 97, 98, 99]) - >>> z[5:10] - array([5, 6, 7, 8, 9]) - >>> z[5:10:2] - array([5, 7, 9]) - >>> z[::2] - array([ 0, 2, 4, ..., 94, 96, 98]) - - Load the entire array into memory:: - - >>> z[...] - array([ 0, 1, 2, ..., 97, 98, 99]) - - Setup a 2-dimensional array:: - - >>> z = zarr.array(np.arange(100).reshape(10, 10)) - - Retrieve an item:: - - >>> z[2, 2] - np.int64(22) - - Retrieve a region via slicing:: - - >>> z[1:3, 1:3] - array([[11, 12], - [21, 22]]) - >>> z[1:3, :] - array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) - >>> z[:, 1:3] - array([[ 1, 2], - [11, 12], - [21, 22], - [31, 32], - [41, 42], - [51, 52], - [61, 62], - [71, 72], - [81, 82], - [91, 92]]) - >>> z[0:5:2, 0:5:2] - array([[ 0, 2, 4], - [20, 22, 24], - [40, 42, 44]]) - >>> z[::2, ::2] - array([[ 0, 2, 4, 6, 8], - [20, 22, 24, 26, 28], - [40, 42, 44, 46, 48], - [60, 62, 64, 66, 68], - [80, 82, 84, 86, 88]]) - - Load the entire array into memory:: - - >>> z[...] - array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], - [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], - [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59], - [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], - [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], - [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], - [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]) - - For arrays with a structured dtype, specific fields can be retrieved, e.g.:: - - >>> a = np.array([(b'aaa', 1, 4.2), - ... (b'bbb', 2, 8.4), - ... (b'ccc', 3, 12.6)], - ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - >>> z = zarr.array(a) - >>> z['foo'] - array([b'aaa', b'bbb', b'ccc'], - dtype='|S3') - - Notes - ----- - Slices with step > 1 are supported, but slices with negative step are not. - - Currently the implementation for __getitem__ is provided by - :func:`vindex` if the indexing is pure fancy indexing (ie a - broadcast-compatible tuple of integer array indices), or by - :func:`set_basic_selection` otherwise. - - Effectively, this means that the following indexing modes are supported: - - - integer indexing - - slice indexing - - mixed slice and integer indexing - - boolean indexing - - fancy indexing (vectorized list of integers) - - For specific indexing options including outer indexing, see the - methods listed under See Also. - - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __setitem__ - - """ - fields, pure_selection = pop_fields(selection) - if is_pure_fancy_indexing(pure_selection, self.ndim): - result = self.vindex[selection] - elif is_pure_orthogonal_indexing(pure_selection, self.ndim): - result = self.get_orthogonal_selection(pure_selection, fields=fields) - else: - result = self.get_basic_selection(pure_selection, fields=fields) - return result - - def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): - """Retrieve data for an item or region of the array. - - Parameters - ---------- - selection : tuple - A tuple specifying the requested item or region for each dimension of the - array. May be any combination of int and/or slice for multidimensional arrays. - out : ndarray, optional - If given, load the selected data directly into this array. - fields : str or sequence of str, optional - For arrays with a structured dtype, one or more fields can be specified to - extract data for. - - Returns - ------- - out : ndarray - A NumPy array containing the data for the requested region. - - Examples - -------- - Setup a 1-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.array(np.arange(100)) - - Retrieve a single item:: - - >>> z.get_basic_selection(5) - np.int64(5) - - Retrieve a region via slicing:: - - >>> z.get_basic_selection(slice(5)) - array([0, 1, 2, 3, 4]) - >>> z.get_basic_selection(slice(-5, None)) - array([95, 96, 97, 98, 99]) - >>> z.get_basic_selection(slice(5, 10)) - array([5, 6, 7, 8, 9]) - >>> z.get_basic_selection(slice(5, 10, 2)) - array([5, 7, 9]) - >>> z.get_basic_selection(slice(None, None, 2)) - array([ 0, 2, 4, ..., 94, 96, 98]) - - Setup a 2-dimensional array:: - - >>> z = zarr.array(np.arange(100).reshape(10, 10)) - - Retrieve an item:: - - >>> z.get_basic_selection((2, 2)) - np.int64(22) - - Retrieve a region via slicing:: - - >>> z.get_basic_selection((slice(1, 3), slice(1, 3))) - array([[11, 12], - [21, 22]]) - >>> z.get_basic_selection((slice(1, 3), slice(None))) - array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) - >>> z.get_basic_selection((slice(None), slice(1, 3))) - array([[ 1, 2], - [11, 12], - [21, 22], - [31, 32], - [41, 42], - [51, 52], - [61, 62], - [71, 72], - [81, 82], - [91, 92]]) - >>> z.get_basic_selection((slice(0, 5, 2), slice(0, 5, 2))) - array([[ 0, 2, 4], - [20, 22, 24], - [40, 42, 44]]) - >>> z.get_basic_selection((slice(None, None, 2), slice(None, None, 2))) - array([[ 0, 2, 4, 6, 8], - [20, 22, 24, 26, 28], - [40, 42, 44, 46, 48], - [60, 62, 64, 66, 68], - [80, 82, 84, 86, 88]]) - - For arrays with a structured dtype, specific fields can be retrieved, e.g.:: - - >>> a = np.array([(b'aaa', 1, 4.2), - ... (b'bbb', 2, 8.4), - ... (b'ccc', 3, 12.6)], - ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - >>> z = zarr.array(a) - >>> z.get_basic_selection(slice(2), fields='foo') - array([b'aaa', b'bbb'], - dtype='|S3') - - Notes - ----- - Slices with step > 1 are supported, but slices with negative step are not. - - Currently this method provides the implementation for accessing data via the - square bracket notation (__getitem__). See :func:`__getitem__` for examples - using the alternative notation. - - See Also - -------- - set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - - """ - - # refresh metadata - if not self._cache_metadata: - self._load_metadata() - - # check args - check_fields(fields, self._dtype) - - # handle zero-dimensional arrays - if self._shape == (): - return self._get_basic_selection_zd(selection=selection, out=out, fields=fields) - else: - return self._get_basic_selection_nd(selection=selection, out=out, fields=fields) - - def _get_basic_selection_zd(self, selection, out=None, fields=None): - # special case basic selection for zero-dimensional array - - # check selection is valid - selection = ensure_tuple(selection) - if selection not in ((), (Ellipsis,)): - err_too_many_indices(selection, ()) - - try: - # obtain encoded data for chunk - ckey = self._chunk_key((0,)) - cdata = self.chunk_store[ckey] - - except KeyError: - # chunk not initialized - chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype) - if self._fill_value is not None: - chunk.fill(self._fill_value) - - else: - chunk = self._decode_chunk(cdata) - - # handle fields - if fields: - chunk = chunk[fields] - - # handle selection of the scalar value via empty tuple - if out is None: - out = chunk[selection] - else: - out[selection] = chunk[selection] - - return out - - def _get_basic_selection_nd(self, selection, out=None, fields=None): - # implementation of basic selection for array with at least one dimension - - # setup indexer - indexer = BasicIndexer(selection, self) - - return self._get_selection(indexer=indexer, out=out, fields=fields) - - def get_orthogonal_selection(self, selection, out=None, fields=None): - """Retrieve data by making a selection for each dimension of the array. For - example, if an array has 2 dimensions, allows selecting specific rows and/or - columns. The selection for each dimension can be either an integer (indexing a - single item), a slice, an array of integers, or a Boolean array where True - values indicate a selection. - - Parameters - ---------- - selection : tuple - A selection for each dimension of the array. May be any combination of int, - slice, integer array or Boolean array. - out : ndarray, optional - If given, load the selected data directly into this array. - fields : str or sequence of str, optional - For arrays with a structured dtype, one or more fields can be specified to - extract data for. - - Returns - ------- - out : ndarray - A NumPy array containing the data for the requested selection. - - Examples - -------- - Setup a 2-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.array(np.arange(100).reshape(10, 10)) - - Retrieve rows and columns via any combination of int, slice, integer array and/or - Boolean array:: - - >>> z.get_orthogonal_selection(([1, 4], slice(None))) - array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) - >>> z.get_orthogonal_selection((slice(None), [1, 4])) - array([[ 1, 4], - [11, 14], - [21, 24], - [31, 34], - [41, 44], - [51, 54], - [61, 64], - [71, 74], - [81, 84], - [91, 94]]) - >>> z.get_orthogonal_selection(([1, 4], [1, 4])) - array([[11, 14], - [41, 44]]) - >>> sel = np.zeros(z.shape[0], dtype=bool) - >>> sel[1] = True - >>> sel[4] = True - >>> z.get_orthogonal_selection((sel, sel)) - array([[11, 14], - [41, 44]]) - - For convenience, the orthogonal selection functionality is also available via the - `oindex` property, e.g.:: - - >>> z.oindex[[1, 4], :] - array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) - >>> z.oindex[:, [1, 4]] - array([[ 1, 4], - [11, 14], - [21, 24], - [31, 34], - [41, 44], - [51, 54], - [61, 64], - [71, 74], - [81, 84], - [91, 94]]) - >>> z.oindex[[1, 4], [1, 4]] - array([[11, 14], - [41, 44]]) - >>> sel = np.zeros(z.shape[0], dtype=bool) - >>> sel[1] = True - >>> sel[4] = True - >>> z.oindex[sel, sel] - array([[11, 14], - [41, 44]]) - - Notes - ----- - Orthogonal indexing is also known as outer indexing. - - Slices with step > 1 are supported, but slices with negative step are not. - - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, set_orthogonal_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - - """ - - # refresh metadata - if not self._cache_metadata: - self._load_metadata() - - # check args - check_fields(fields, self._dtype) - - # setup indexer - indexer = OrthogonalIndexer(selection, self) - - return self._get_selection(indexer=indexer, out=out, fields=fields) - - def get_coordinate_selection(self, selection, out=None, fields=None): - """Retrieve a selection of individual items, by providing the indices - (coordinates) for each selected item. - - Parameters - ---------- - selection : tuple - An integer (coordinate) array for each dimension of the array. - out : ndarray, optional - If given, load the selected data directly into this array. - fields : str or sequence of str, optional - For arrays with a structured dtype, one or more fields can be specified to - extract data for. - - Returns - ------- - out : ndarray - A NumPy array containing the data for the requested selection. - - Examples - -------- - Setup a 2-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.array(np.arange(100).reshape(10, 10)) - - Retrieve items by specifying their coordinates:: - - >>> z.get_coordinate_selection(([1, 4], [1, 4])) - array([11, 44]) - - For convenience, the coordinate selection functionality is also available via the - `vindex` property, e.g.:: - - >>> z.vindex[[1, 4], [1, 4]] - array([11, 44]) - - Notes - ----- - Coordinate indexing is also known as point selection, and is a form of vectorized - or inner indexing. - - Slices are not supported. Coordinate arrays must be provided for all dimensions - of the array. - - Coordinate arrays may be multidimensional, in which case the output array will - also be multidimensional. Coordinate arrays are broadcast against each other - before being applied. The shape of the output will be the same as the shape of - each coordinate array after broadcasting. - - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, set_coordinate_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - - """ - - # refresh metadata - if not self._cache_metadata: - self._load_metadata() - - # check args - check_fields(fields, self._dtype) - - # setup indexer - indexer = CoordinateIndexer(selection, self) - - # handle output - need to flatten - if out is not None: - out = out.reshape(-1) - - out = self._get_selection(indexer=indexer, out=out, fields=fields) - - # restore shape - out = out.reshape(indexer.sel_shape) - - return out - - def get_block_selection(self, selection, out=None, fields=None): - """Retrieve a selection of individual chunk blocks, by providing the indices - (coordinates) for each chunk block. - - Parameters - ---------- - selection : tuple - An integer (coordinate) or slice for each dimension of the array. - out : ndarray, optional - If given, load the selected data directly into this array. - fields : str or sequence of str, optional - For arrays with a structured dtype, one or more fields can be specified to - extract data for. - - Returns - ------- - out : ndarray - A NumPy array containing the data for the requested selection. - - Examples - -------- - Setup a 2-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.array(np.arange(100).reshape(10, 10), chunks=(3, 3)) - - Retrieve items by specifying their block coordinates:: - - >>> z.get_block_selection((1, slice(None))) - array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) - - Which is equivalent to:: - - >>> z[3:6, :] - array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) - - For convenience, the block selection functionality is also available via the - `blocks` property, e.g.:: - - >>> z.blocks[1] - array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], - [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) - - Notes - ----- - Block indexing is a convenience indexing method to work on individual chunks - with chunk index slicing. It has the same concept as Dask's `Array.blocks` - indexing. - - Slices are supported. However, only with a step size of one. - - Block index arrays may be multidimensional to index multidimensional arrays. - For example:: - - >>> z.blocks[0, 1:3] - array([[ 3, 4, 5, 6, 7, 8], - [13, 14, 15, 16, 17, 18], - [23, 24, 25, 26, 27, 28]]) - - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - - """ - if not self._cache_metadata: - self._load_metadata() - - # check args - check_fields(fields, self._dtype) - - # setup indexer - indexer = BlockIndexer(selection, self) - - return self._get_selection(indexer=indexer, out=out, fields=fields) - - def get_mask_selection(self, selection, out=None, fields=None): - """Retrieve a selection of individual items, by providing a Boolean array of the - same shape as the array against which the selection is being made, where True - values indicate a selected item. - - Parameters - ---------- - selection : ndarray, bool - A Boolean array of the same shape as the array against which the selection is - being made. - out : ndarray, optional - If given, load the selected data directly into this array. - fields : str or sequence of str, optional - For arrays with a structured dtype, one or more fields can be specified to - extract data for. - - Returns - ------- - out : ndarray - A NumPy array containing the data for the requested selection. - - Examples - -------- - Setup a 2-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.array(np.arange(100).reshape(10, 10)) - - Retrieve items by specifying a mask:: - - >>> sel = np.zeros_like(z, dtype=bool) - >>> sel[1, 1] = True - >>> sel[4, 4] = True - >>> z.get_mask_selection(sel) - array([11, 44]) - - For convenience, the mask selection functionality is also available via the - `vindex` property, e.g.:: - - >>> z.vindex[sel] - array([11, 44]) - - Notes - ----- - Mask indexing is a form of vectorized or inner indexing, and is equivalent to - coordinate indexing. Internally the mask array is converted to coordinate - arrays by calling `np.nonzero`. - - See Also - -------- - get_basic_selection, set_basic_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - """ - - # refresh metadata - if not self._cache_metadata: - self._load_metadata() - - # check args - check_fields(fields, self._dtype) - - # setup indexer - indexer = MaskIndexer(selection, self) - - return self._get_selection(indexer=indexer, out=out, fields=fields) - - def _get_selection(self, indexer, out=None, fields=None): - # We iterate over all chunks which overlap the selection and thus contain data - # that needs to be extracted. Each chunk is processed in turn, extracting the - # necessary data and storing into the correct location in the output array. - - # N.B., it is an important optimisation that we only visit chunks which overlap - # the selection. This minimises the number of iterations in the main for loop. - - # check fields are sensible - out_dtype = check_fields(fields, self._dtype) - - # determine output shape - out_shape = indexer.shape - - # setup output array - if out is None: - out = np.empty_like( - self._meta_array, shape=out_shape, dtype=out_dtype, order=self._order - ) - else: - check_array_shape("out", out, out_shape) - - # iterate over chunks - - if math.prod(out_shape) > 0: - # allow storage to get multiple items at once - lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) - self._chunk_getitems( - lchunk_coords, - lchunk_selection, - out, - lout_selection, - drop_axes=indexer.drop_axes, - fields=fields, - ) - if out.shape: - return out - else: - return out[()] - - def __setitem__(self, selection, value): - """Modify data for an item or region of the array. - - Parameters - ---------- - selection : tuple - An integer index or slice or tuple of int/slice specifying the requested - region for each dimension of the array. - value : scalar or array-like - Value to be stored into the array. - - Examples - -------- - Setup a 1-dimensional array:: - - >>> import zarr - >>> z = zarr.zeros(100, dtype=int) - - Set all array elements to the same scalar value:: - - >>> z[...] = 42 - >>> z[...] - array([42, 42, 42, ..., 42, 42, 42]) - - Set a portion of the array:: - - >>> z[:10] = np.arange(10) - >>> z[-10:] = np.arange(10)[::-1] - >>> z[...] - array([ 0, 1, 2, ..., 2, 1, 0]) - - Setup a 2-dimensional array:: - - >>> z = zarr.zeros((5, 5), dtype=int) - - Set all array elements to the same scalar value:: - - >>> z[...] = 42 - - Set a portion of the array:: - - >>> z[0, :] = np.arange(z.shape[1]) - >>> z[:, 0] = np.arange(z.shape[0]) - >>> z[...] - array([[ 0, 1, 2, 3, 4], - [ 1, 42, 42, 42, 42], - [ 2, 42, 42, 42, 42], - [ 3, 42, 42, 42, 42], - [ 4, 42, 42, 42, 42]]) - - For arrays with a structured dtype, specific fields can be modified, e.g.:: - - >>> a = np.array([(b'aaa', 1, 4.2), - ... (b'bbb', 2, 8.4), - ... (b'ccc', 3, 12.6)], - ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - >>> z = zarr.array(a) - >>> z['foo'] = b'zzz' - >>> z[...] - array([(b'zzz', 1, 4.2), (b'zzz', 2, 8.4), (b'zzz', 3, 12.6)], - dtype=[('foo', 'S3'), ('bar', ' 1 are supported, but slices with negative step are not. - - Currently the implementation for __setitem__ is provided by - :func:`vindex` if the indexing is pure fancy indexing (ie a - broadcast-compatible tuple of integer array indices), or by - :func:`set_basic_selection` otherwise. - - Effectively, this means that the following indexing modes are supported: - - - integer indexing - - slice indexing - - mixed slice and integer indexing - - boolean indexing - - fancy indexing (vectorized list of integers) - - For specific indexing options including outer indexing, see the - methods listed under See Also. - - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__ - - """ - fields, pure_selection = pop_fields(selection) - if is_pure_fancy_indexing(pure_selection, self.ndim): - self.vindex[selection] = value - elif is_pure_orthogonal_indexing(pure_selection, self.ndim): - self.set_orthogonal_selection(pure_selection, value, fields=fields) - else: - self.set_basic_selection(pure_selection, value, fields=fields) - - def set_basic_selection(self, selection, value, fields=None): - """Modify data for an item or region of the array. - - Parameters - ---------- - selection : tuple - An integer index or slice or tuple of int/slice specifying the requested - region for each dimension of the array. - value : scalar or array-like - Value to be stored into the array. - fields : str or sequence of str, optional - For arrays with a structured dtype, one or more fields can be specified to set - data for. - - Examples - -------- - Setup a 1-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.zeros(100, dtype=int) - - Set all array elements to the same scalar value:: - - >>> z.set_basic_selection(..., 42) - >>> z[...] - array([42, 42, 42, ..., 42, 42, 42]) - - Set a portion of the array:: - - >>> z.set_basic_selection(slice(10), np.arange(10)) - >>> z.set_basic_selection(slice(-10, None), np.arange(10)[::-1]) - >>> z[...] - array([ 0, 1, 2, ..., 2, 1, 0]) - - Setup a 2-dimensional array:: - - >>> z = zarr.zeros((5, 5), dtype=int) - - Set all array elements to the same scalar value:: - - >>> z.set_basic_selection(..., 42) - - Set a portion of the array:: - - >>> z.set_basic_selection((0, slice(None)), np.arange(z.shape[1])) - >>> z.set_basic_selection((slice(None), 0), np.arange(z.shape[0])) - >>> z[...] - array([[ 0, 1, 2, 3, 4], - [ 1, 42, 42, 42, 42], - [ 2, 42, 42, 42, 42], - [ 3, 42, 42, 42, 42], - [ 4, 42, 42, 42, 42]]) - - For arrays with a structured dtype, the `fields` parameter can be used to set - data for a specific field, e.g.:: - - >>> a = np.array([(b'aaa', 1, 4.2), - ... (b'bbb', 2, 8.4), - ... (b'ccc', 3, 12.6)], - ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - >>> z = zarr.array(a) - >>> z.set_basic_selection(slice(0, 2), b'zzz', fields='foo') - >>> z[:] - array([(b'zzz', 1, 4.2), (b'zzz', 2, 8.4), (b'ccc', 3, 12.6)], - dtype=[('foo', 'S3'), ('bar', '>> import zarr - >>> import numpy as np - >>> z = zarr.zeros((5, 5), dtype=int) - - Set data for a selection of rows:: - - >>> z.set_orthogonal_selection(([1, 4], slice(None)), 1) - >>> z[...] - array([[0, 0, 0, 0, 0], - [1, 1, 1, 1, 1], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [1, 1, 1, 1, 1]]) - - Set data for a selection of columns:: - - >>> z.set_orthogonal_selection((slice(None), [1, 4]), 2) - >>> z[...] - array([[0, 2, 0, 0, 2], - [1, 2, 1, 1, 2], - [0, 2, 0, 0, 2], - [0, 2, 0, 0, 2], - [1, 2, 1, 1, 2]]) - - Set data for a selection of rows and columns:: - - >>> z.set_orthogonal_selection(([1, 4], [1, 4]), 3) - >>> z[...] - array([[0, 2, 0, 0, 2], - [1, 3, 1, 1, 3], - [0, 2, 0, 0, 2], - [0, 2, 0, 0, 2], - [1, 3, 1, 1, 3]]) - - For convenience, this functionality is also available via the `oindex` property. - E.g.:: - - >>> z.oindex[[1, 4], [1, 4]] = 4 - >>> z[...] - array([[0, 2, 0, 0, 2], - [1, 4, 1, 1, 4], - [0, 2, 0, 0, 2], - [0, 2, 0, 0, 2], - [1, 4, 1, 1, 4]]) - - Notes - ----- - Orthogonal indexing is also known as outer indexing. - - Slices with step > 1 are supported, but slices with negative step are not. - - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - - """ - - # guard conditions - if self._read_only: - raise ReadOnlyError() - - # refresh metadata - if not self._cache_metadata: - self._load_metadata_nosync() - - # setup indexer - indexer = OrthogonalIndexer(selection, self) - - self._set_selection(indexer, value, fields=fields) - - def set_coordinate_selection(self, selection, value, fields=None): - """Modify a selection of individual items, by providing the indices (coordinates) - for each item to be modified. - - Parameters - ---------- - selection : tuple - An integer (coordinate) array for each dimension of the array. - value : scalar or array-like - Value to be stored into the array. - fields : str or sequence of str, optional - For arrays with a structured dtype, one or more fields can be specified to set - data for. - - Examples - -------- - Setup a 2-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.zeros((5, 5), dtype=int) - - Set data for a selection of items:: - - >>> z.set_coordinate_selection(([1, 4], [1, 4]), 1) - >>> z[...] - array([[0, 0, 0, 0, 0], - [0, 1, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 1]]) - - For convenience, this functionality is also available via the `vindex` property. - E.g.:: - - >>> z.vindex[[1, 4], [1, 4]] = 2 - >>> z[...] - array([[0, 0, 0, 0, 0], - [0, 2, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 2]]) - - Notes - ----- - Coordinate indexing is also known as point selection, and is a form of vectorized - or inner indexing. - - Slices are not supported. Coordinate arrays must be provided for all dimensions - of the array. - - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - - """ - - # guard conditions - if self._read_only: - raise ReadOnlyError() - - # refresh metadata - if not self._cache_metadata: - self._load_metadata_nosync() - - # setup indexer - indexer = CoordinateIndexer(selection, self) - - # handle value - need ndarray-like flatten value - if not is_scalar(value, self._dtype): - try: - value = ensure_ndarray_like(value) - except TypeError: - # Handle types like `list` or `tuple` - value = np.array(value, like=self._meta_array) - if hasattr(value, "shape") and len(value.shape) > 1: - value = value.reshape(-1) - - self._set_selection(indexer, value, fields=fields) - - def set_block_selection(self, selection, value, fields=None): - """Modify a selection of individual blocks, by providing the chunk indices - (coordinates) for each block to be modified. - - Parameters - ---------- - selection : tuple - An integer (coordinate) or slice for each dimension of the array. - value : scalar or array-like - Value to be stored into the array. - fields : str or sequence of str, optional - For arrays with a structured dtype, one or more fields can be specified to set - data for. - - Examples - -------- - Set up a 2-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.zeros((6, 6), dtype=int, chunks=2) - - Set data for a selection of items:: - - >>> z.set_block_selection((1, 0), 1) - >>> z[...] - array([[0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0]]) - - For convenience, this functionality is also available via the `blocks` property. - E.g.:: - - >>> z.blocks[2, 1] = 4 - >>> z[...] - array([[0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], - [0, 0, 4, 4, 0, 0], - [0, 0, 4, 4, 0, 0]]) - - >>> z.blocks[:, 2] = 7 - >>> z[...] - array([[0, 0, 0, 0, 7, 7], - [0, 0, 0, 0, 7, 7], - [1, 1, 0, 0, 7, 7], - [1, 1, 0, 0, 7, 7], - [0, 0, 4, 4, 7, 7], - [0, 0, 4, 4, 7, 7]]) - - Notes - ----- - Block indexing is a convenience indexing method to work on individual chunks - with chunk index slicing. It has the same concept as Dask's `Array.blocks` - indexing. - - Slices are supported. However, only with a step size of one. - - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - - """ - # guard conditions - if self._read_only: - raise ReadOnlyError() - - # refresh metadata - if not self._cache_metadata: - self._load_metadata_nosync() - - # setup indexer - indexer = BlockIndexer(selection, self) - - self._set_selection(indexer, value, fields=fields) - - def set_mask_selection(self, selection, value, fields=None): - """Modify a selection of individual items, by providing a Boolean array of the - same shape as the array against which the selection is being made, where True - values indicate a selected item. - - Parameters - ---------- - selection : ndarray, bool - A Boolean array of the same shape as the array against which the selection is - being made. - value : scalar or array-like - Value to be stored into the array. - fields : str or sequence of str, optional - For arrays with a structured dtype, one or more fields can be specified to set - data for. - - Examples - -------- - Setup a 2-dimensional array:: - - >>> import zarr - >>> import numpy as np - >>> z = zarr.zeros((5, 5), dtype=int) - - Set data for a selection of items:: - - >>> sel = np.zeros_like(z, dtype=bool) - >>> sel[1, 1] = True - >>> sel[4, 4] = True - >>> z.set_mask_selection(sel, 1) - >>> z[...] - array([[0, 0, 0, 0, 0], - [0, 1, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 1]]) - - For convenience, this functionality is also available via the `vindex` property. - E.g.:: - - >>> z.vindex[sel] = 2 - >>> z[...] - array([[0, 0, 0, 0, 0], - [0, 2, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 2]]) - - Notes - ----- - Mask indexing is a form of vectorized or inner indexing, and is equivalent to - coordinate indexing. Internally the mask array is converted to coordinate - arrays by calling `np.nonzero`. - - See Also - -------- - get_basic_selection, set_basic_selection, get_mask_selection, - get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, get_block_selection, set_block_selection, - vindex, oindex, blocks, __getitem__, __setitem__ - - """ - - # guard conditions - if self._read_only: - raise ReadOnlyError() - - # refresh metadata - if not self._cache_metadata: - self._load_metadata_nosync() - - # setup indexer - indexer = MaskIndexer(selection, self) - - self._set_selection(indexer, value, fields=fields) - - def _set_basic_selection_zd(self, selection, value, fields=None): - # special case __setitem__ for zero-dimensional array - - # check selection is valid - selection = ensure_tuple(selection) - if selection not in ((), (Ellipsis,)): - err_too_many_indices(selection, self._shape) - - # check fields - check_fields(fields, self._dtype) - fields = check_no_multi_fields(fields) - - # obtain key for chunk - ckey = self._chunk_key((0,)) - - # setup chunk - try: - # obtain compressed data for chunk - cdata = self.chunk_store[ckey] - - except KeyError: - # chunk not initialized - chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype) - if self._fill_value is not None: - chunk.fill(self._fill_value) - - else: - # decode chunk - chunk = self._decode_chunk(cdata).copy() - - # set value - if fields: - chunk[fields][selection] = value - else: - chunk[selection] = value - - # remove chunk if write_empty_chunks is false and it only contains the fill value - if (not self.write_empty_chunks) and all_equal(self.fill_value, chunk): - try: - del self.chunk_store[ckey] - return - except Exception: # pragma: no cover - # deleting failed, fallback to overwriting - pass - else: - # encode and store - cdata = self._encode_chunk(chunk) - self.chunk_store[ckey] = cdata - - def _set_basic_selection_nd(self, selection, value, fields=None): - # implementation of __setitem__ for array with at least one dimension - - # setup indexer - indexer = BasicIndexer(selection, self) - - self._set_selection(indexer, value, fields=fields) - - def _set_selection(self, indexer, value, fields=None): - # We iterate over all chunks which overlap the selection and thus contain data - # that needs to be replaced. Each chunk is processed in turn, extracting the - # necessary data from the value array and storing into the chunk array. - - # N.B., it is an important optimisation that we only visit chunks which overlap - # the selection. This minimises the number of iterations in the main for loop. - - # check fields are sensible - check_fields(fields, self._dtype) - fields = check_no_multi_fields(fields) - - # determine indices of chunks overlapping the selection - sel_shape = indexer.shape - - # check value shape - if sel_shape == (): - # setting a single item - pass - elif is_scalar(value, self._dtype): - # setting a scalar value - pass - else: - if not hasattr(value, "shape"): - value = np.asanyarray(value, like=self._meta_array) - check_array_shape("value", value, sel_shape) - - # iterate over chunks in range - if ( - not hasattr(self.chunk_store, "setitems") - or self._synchronizer is not None - or any(map(lambda x: x == 0, self.shape)) - ): - # iterative approach - for chunk_coords, chunk_selection, out_selection in indexer: - # extract data to store - if sel_shape == (): - chunk_value = value - elif is_scalar(value, self._dtype): - chunk_value = value - else: - chunk_value = value[out_selection] - # handle missing singleton dimensions - if indexer.drop_axes: - item = [slice(None)] * self.ndim - for a in indexer.drop_axes: - item[a] = np.newaxis - item = tuple(item) - chunk_value = chunk_value[item] - - # put data - self._chunk_setitem(chunk_coords, chunk_selection, chunk_value, fields=fields) - else: - lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) - chunk_values = [] - for out_selection in lout_selection: - if sel_shape == (): - chunk_values.append(value) - elif is_scalar(value, self._dtype): - chunk_values.append(value) - else: - cv = value[out_selection] - # handle missing singleton dimensions - if indexer.drop_axes: # pragma: no cover - item = [slice(None)] * self.ndim - for a in indexer.drop_axes: - item[a] = np.newaxis - item = tuple(item) - cv = chunk_value[item] - chunk_values.append(cv) - - self._chunk_setitems(lchunk_coords, lchunk_selection, chunk_values, fields=fields) - - def _process_chunk( - self, - out, - cdata, - chunk_selection, - drop_axes, - out_is_ndarray, - fields, - out_selection, - partial_read_decode=False, - ): - """Take binary data from storage and fill output array""" - if ( - out_is_ndarray - and not fields - and is_contiguous_selection(out_selection) - and is_total_slice(chunk_selection, self._chunks) - and not self._filters - and self._dtype != object - ): - # For 0D arrays out_selection = () and out[out_selection] is a scalar - # Avoid that - dest = out[out_selection] if out_selection else out - # Assume that array-like objects that doesn't have a - # `writeable` flag is writable. - dest_is_writable = getattr(dest, "writeable", True) - write_direct = dest_is_writable and ( - (self._order == "C" and dest.flags.c_contiguous) - or (self._order == "F" and dest.flags.f_contiguous) - ) - - if write_direct: - # optimization: we want the whole chunk, and the destination is - # contiguous, so we can decompress directly from the chunk - # into the destination array - if self._compressor: - if isinstance(cdata, PartialReadBuffer): - cdata = cdata.read_full() - self._compressor.decode(cdata, dest) - else: - if isinstance(cdata, UncompressedPartialReadBufferV3): - cdata = cdata.read_full() - chunk = ensure_ndarray_like(cdata).view(self._dtype) - # dest.shape is not self._chunks when a dimensions is squeezed out - # For example, assume self._chunks = (5, 5, 1) - # and the selection is [:, :, 0] - # Then out_selection is (slice(5), slice(5)) - # See https://github.com/zarr-developers/zarr-python/issues/1931 - chunk = chunk.reshape(dest.shape, order=self._order) - np.copyto(dest, chunk) - return - - # decode chunk - try: - if partial_read_decode: - cdata.prepare_chunk() - # size of chunk - tmp = np.empty_like(self._meta_array, shape=self._chunks, dtype=self.dtype) - index_selection = PartialChunkIterator(chunk_selection, self.chunks) - for start, nitems, partial_out_selection in index_selection: - expected_shape = [ - ( - len(range(*partial_out_selection[i].indices(self.chunks[0] + 1))) - if i < len(partial_out_selection) - else dim - ) - for i, dim in enumerate(self.chunks) - ] - if isinstance(cdata, UncompressedPartialReadBufferV3): - chunk_partial = self._decode_chunk( - cdata.read_part(start, nitems), - start=start, - nitems=nitems, - expected_shape=expected_shape, - ) - else: - cdata.read_part(start, nitems) - chunk_partial = self._decode_chunk( - cdata.buff, - start=start, - nitems=nitems, - expected_shape=expected_shape, - ) - tmp[partial_out_selection] = chunk_partial - out[out_selection] = tmp[chunk_selection] - return - except ArrayIndexError: - cdata = cdata.read_full() - chunk = self._decode_chunk(cdata) - - # select data from chunk - if fields: - chunk = chunk[fields] - tmp = chunk[chunk_selection] - if drop_axes: - tmp = np.squeeze(tmp, axis=drop_axes) - - # store selected data in output - out[out_selection] = tmp - - def _chunk_getitems( - self, lchunk_coords, lchunk_selection, out, lout_selection, drop_axes=None, fields=None - ): - """Obtain part or whole of chunks. - - Parameters - ---------- - chunk_coords : list of tuple of ints - Indices of the chunks. - chunk_selection : list of selections - Location of region within the chunks to extract. - out : ndarray - Array to store result in. - out_selection : list of selections - Location of regions within output array to store results in. - drop_axes : tuple of ints - Axes to squeeze out of the chunk. - fields - TODO - """ - - out_is_ndarray = True - try: - out = ensure_ndarray_like(out) - except TypeError: # pragma: no cover - out_is_ndarray = False - - # Keys to retrieve - ckeys = [self._chunk_key(ch) for ch in lchunk_coords] - - # Check if we can do a partial read - if ( - self._partial_decompress - and self._compressor - and self._compressor.codec_id == "blosc" - and hasattr(self._compressor, "decode_partial") - and not fields - and self.dtype != object - and hasattr(self.chunk_store, "getitems") - ): - partial_read_decode = True - cdatas = { - ckey: PartialReadBuffer(ckey, self.chunk_store) - for ckey in ckeys - if ckey in self.chunk_store - } - elif ( - self._partial_decompress - and not self._compressor - and not fields - and self.dtype != object - and hasattr(self.chunk_store, "get_partial_values") - and self.chunk_store.supports_efficient_get_partial_values - ): - partial_read_decode = True - cdatas = { - ckey: UncompressedPartialReadBufferV3( - ckey, self.chunk_store, itemsize=self.itemsize - ) - for ckey in ckeys - if ckey in self.chunk_store - } - elif hasattr(self.chunk_store, "get_partial_values"): - partial_read_decode = False - values = self.chunk_store.get_partial_values([(ckey, (0, None)) for ckey in ckeys]) - cdatas = {key: value for key, value in zip(ckeys, values) if value is not None} - else: - partial_read_decode = False - contexts = {} - if not isinstance(self._meta_array, np.ndarray): - contexts = ConstantMap(ckeys, constant=Context(meta_array=self._meta_array)) - cdatas = self.chunk_store.getitems(ckeys, contexts=contexts) - - for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection): - if ckey in cdatas: - self._process_chunk( - out, - cdatas[ckey], - chunk_select, - drop_axes, - out_is_ndarray, - fields, - out_select, - partial_read_decode=partial_read_decode, - ) - else: - # check exception type - if self._fill_value is not None: - if fields: - fill_value = self._fill_value[fields] - else: - fill_value = self._fill_value - out[out_select] = fill_value - - def _chunk_setitems(self, lchunk_coords, lchunk_selection, values, fields=None): - ckeys = map(self._chunk_key, lchunk_coords) - cdatas = { - key: self._process_for_setitem(key, sel, val, fields=fields) - for key, sel, val in zip(ckeys, lchunk_selection, values) - } - to_store = {} - if not self.write_empty_chunks: - empty_chunks = {k: v for k, v in cdatas.items() if all_equal(self.fill_value, v)} - self._chunk_delitems(empty_chunks.keys()) - nonempty_keys = cdatas.keys() - empty_chunks.keys() - to_store = {k: self._encode_chunk(cdatas[k]) for k in nonempty_keys} - else: - to_store = {k: self._encode_chunk(v) for k, v in cdatas.items()} - self.chunk_store.setitems(to_store) - - def _chunk_delitems(self, ckeys): - if hasattr(self.store, "delitems"): - self.store.delitems(ckeys) - else: # pragma: no cover - # exempting this branch from coverage as there are no extant stores - # that will trigger this condition, but it's possible that they - # will be developed in the future. - tuple(map(self._chunk_delitem, ckeys)) - - def _chunk_delitem(self, ckey): - """ - Attempt to delete the value associated with ckey. - """ - try: - del self.chunk_store[ckey] - except KeyError: - pass - - def _chunk_setitem(self, chunk_coords, chunk_selection, value, fields=None): - """Replace part or whole of a chunk. - - Parameters - ---------- - chunk_coords : tuple of ints - Indices of the chunk. - chunk_selection : tuple of slices - Location of region within the chunk. - value : scalar or ndarray - Value to set. - - """ - - if self._synchronizer is None: - # no synchronization - lock = nolock - else: - # synchronize on the chunk - ckey = self._chunk_key(chunk_coords) - lock = self._synchronizer[ckey] - - with lock: - self._chunk_setitem_nosync(chunk_coords, chunk_selection, value, fields=fields) - - def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=None): - ckey = self._chunk_key(chunk_coords) - cdata = self._process_for_setitem(ckey, chunk_selection, value, fields=fields) - - # attempt to delete chunk if it only contains the fill value - if (not self.write_empty_chunks) and all_equal(self.fill_value, cdata): - self._chunk_delitem(ckey) - else: - self.chunk_store[ckey] = self._encode_chunk(cdata) - - def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): - if is_total_slice(chunk_selection, self._chunks) and not fields: - # totally replace chunk - - # optimization: we are completely replacing the chunk, so no need - # to access the existing chunk data - - if is_scalar(value, self._dtype): - # setup array filled with value - chunk = np.empty_like( - self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order - ) - chunk.fill(value) - - else: - # ensure array is contiguous - chunk = value.astype(self._dtype, order=self._order, copy=False) - - else: - # partially replace the contents of this chunk - - try: - # obtain compressed data for chunk - cdata = self.chunk_store[ckey] - - except KeyError: - # chunk not initialized - if self._fill_value is not None: - chunk = np.empty_like( - self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order - ) - chunk.fill(self._fill_value) - elif self._dtype == object: - chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) - else: - # N.B., use zeros here so any region beyond the array has consistent - # and compressible data - chunk = np.zeros_like( - self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order - ) - - else: - # decode chunk - chunk = self._decode_chunk(cdata) - if not chunk.flags.writeable: - chunk = chunk.copy(order="K") - - # modify - if fields: - # N.B., currently multi-field assignment is not supported in numpy, so - # this only works for a single field - chunk[fields][chunk_selection] = value - else: - chunk[chunk_selection] = value - - return chunk - - def _chunk_key(self, chunk_coords): - if self._version == 3: - # _chunk_key() corresponds to data_key(P, i, j, ...) example in the spec - # where P = self._key_prefix, i, j, ... = chunk_coords - # e.g. c0/2/3 for 3d array with chunk index (0, 2, 3) - # https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#regular-grids - return ( - "data/root/" - + self._key_prefix - + "c" - + self._dimension_separator.join(map(str, chunk_coords)) - ) - else: - return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) - - def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): - # decompress - if self._compressor: - # only decode requested items - if ( - all(x is not None for x in [start, nitems]) and self._compressor.codec_id == "blosc" - ) and hasattr(self._compressor, "decode_partial"): - chunk = self._compressor.decode_partial(cdata, start, nitems) - else: - chunk = self._compressor.decode(cdata) - else: - chunk = cdata - - # apply filters - if self._filters: - for f in reversed(self._filters): - chunk = f.decode(chunk) - - # view as numpy array with correct dtype - chunk = ensure_ndarray_like(chunk) - # special case object dtype, because incorrect handling can lead to - # segfaults and other bad things happening - if self._dtype != object: - chunk = chunk.view(self._dtype) - elif chunk.dtype != object: - # If we end up here, someone must have hacked around with the filters. - # We cannot deal with object arrays unless there is an object - # codec in the filter chain, i.e., a filter that converts from object - # array to something else during encoding, and converts back to object - # array during decoding. - raise RuntimeError("cannot read object array without object codec") - - # ensure correct chunk shape - chunk = chunk.reshape(-1, order="A") - chunk = chunk.reshape(expected_shape or self._chunks, order=self._order) - - return chunk - - def _encode_chunk(self, chunk): - # apply filters - if self._filters: - for f in self._filters: - chunk = f.encode(chunk) - - # check object encoding - if ensure_ndarray_like(chunk).dtype == object: - raise RuntimeError("cannot write object array without object codec") - - # compress - if self._compressor: - cdata = self._compressor.encode(chunk) - else: - cdata = chunk - - # ensure in-memory data is immutable and easy to compare - if isinstance(self.chunk_store, KVStore) or isinstance(self._chunk_store, KVStore): - cdata = ensure_bytes(cdata) - - return cdata - - def __repr__(self): - t = type(self) - r = f"<{t.__module__}.{t.__name__}" - if self.name: - r += f" {self.name!r}" - r += f" {str(self.shape)}" - r += f" {self.dtype}" - if self._read_only: - r += " read-only" - r += ">" - return r - - @property - def info(self): - """Report some diagnostic information about the array. - - Examples - -------- - >>> import zarr - >>> z = zarr.zeros(1000000, chunks=100000, dtype='i4') - >>> z.info - Type : zarr.core.Array - Data type : int32 - Shape : (1000000,) - Chunk shape : (100000,) - Order : C - Read-only : False - Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : zarr.storage.KVStore - No. bytes : 4000000 (3.8M) - No. bytes stored : 320 - Storage ratio : 12500.0 - Chunks initialized : 0/10 - - """ - return InfoReporter(self) - - def info_items(self): - return self._synchronized_op(self._info_items_nosync) - - def _info_items_nosync(self): - def typestr(o): - return f"{type(o).__module__}.{type(o).__name__}" - - def bytestr(n): - if n > 2**10: - return f"{n} ({human_readable_size(n)})" - else: - return str(n) - - items = [] - - # basic info - if self.name is not None: - items += [("Name", self.name)] - items += [ - ("Type", typestr(self)), - ("Data type", str(self.dtype)), - ("Shape", str(self.shape)), - ("Chunk shape", str(self.chunks)), - ("Order", self.order), - ("Read-only", str(self.read_only)), - ] - - # filters - if self.filters: - for i, f in enumerate(self.filters): - items += [(f"Filter [{i}]", repr(f))] - - # compressor - items += [("Compressor", repr(self.compressor))] - - # synchronizer - if self._synchronizer is not None: - items += [("Synchronizer type", typestr(self._synchronizer))] - - # storage info - nbytes = self.nbytes - nbytes_stored = self.nbytes_stored - items += [("Store type", typestr(self._store))] - if self._chunk_store is not None: - items += [("Chunk store type", typestr(self._chunk_store))] - items += [("No. bytes", bytestr(nbytes))] - if nbytes_stored > 0: - items += [ - ("No. bytes stored", bytestr(nbytes_stored)), - ("Storage ratio", f"{nbytes / nbytes_stored:.1f}"), - ] - items += [("Chunks initialized", f"{self.nchunks_initialized}/{self.nchunks}")] - - return items - - def digest(self, hashname="sha1"): - """ - Compute a checksum for the data. Default uses sha1 for speed. - - Examples - -------- - >>> import binascii - >>> import zarr - >>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000)) - >>> binascii.hexlify(z.digest()) - b'041f90bc7a571452af4f850a8ca2c6cddfa8a1ac' - >>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000)) - >>> binascii.hexlify(z.digest()) - b'7162d416d26a68063b66ed1f30e0a866e4abed60' - >>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000)) - >>> binascii.hexlify(z.digest()) - b'cb387af37410ae5a3222e893cf3373e4e4f22816' - """ - - h = hashlib.new(hashname) - - for i in itertools.product(*[range(s) for s in self.cdata_shape]): - h.update(self.chunk_store.get(self._chunk_key(i), b"")) - - mkey = _prefix_to_array_key(self._store, self._key_prefix) - h.update(self.store.get(mkey, b"")) - - h.update(self.store.get(self.attrs.key, b"")) - - checksum = h.digest() - - return checksum - - def hexdigest(self, hashname="sha1"): - """ - Compute a checksum for the data. Default uses sha1 for speed. - - Examples - -------- - >>> import zarr - >>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000)) - >>> z.hexdigest() - '041f90bc7a571452af4f850a8ca2c6cddfa8a1ac' - >>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000)) - >>> z.hexdigest() - '7162d416d26a68063b66ed1f30e0a866e4abed60' - >>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000)) - >>> z.hexdigest() - 'cb387af37410ae5a3222e893cf3373e4e4f22816' - """ - - checksum = binascii.hexlify(self.digest(hashname=hashname)) - - # This is a bytes object on Python 3 and we want a str. - if not isinstance(checksum, str): - checksum = checksum.decode("utf8") - - return checksum - - def __getstate__(self): - return { - "store": self._store, - "path": self._path, - "read_only": self._read_only, - "chunk_store": self._chunk_store, - "synchronizer": self._synchronizer, - "cache_metadata": self._cache_metadata, - "cache_attrs": self._attrs.cache, - "partial_decompress": self._partial_decompress, - "write_empty_chunks": self._write_empty_chunks, - "zarr_version": self._version, - "meta_array": self._meta_array, - } - - def __setstate__(self, state): - self.__init__(**state) - - def _synchronized_op(self, f, *args, **kwargs): - if self._synchronizer is None: - # no synchronization - lock = nolock - - else: - # synchronize on the array - mkey = _prefix_to_array_key(self._store, self._key_prefix) - lock = self._synchronizer[mkey] - - with lock: - self._refresh_metadata_nosync() - result = f(*args, **kwargs) - - return result - - def _write_op(self, f, *args, **kwargs): - # guard condition - if self._read_only: - raise ReadOnlyError() - - return self._synchronized_op(f, *args, **kwargs) - - def resize(self, *args): - """Change the shape of the array by growing or shrinking one or more - dimensions. - - Examples - -------- - >>> import zarr - >>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000)) - >>> z.shape - (10000, 10000) - >>> z.resize(20000, 10000) - >>> z.shape - (20000, 10000) - >>> z.resize(30000, 1000) - >>> z.shape - (30000, 1000) - - Notes - ----- - When resizing an array, the data are not rearranged in any way. - - If one or more dimensions are shrunk, any chunks falling outside the - new array shape will be deleted from the underlying store. - However, it is noteworthy that the chunks partially falling inside the new array - (i.e. boundary chunks) will remain intact, and therefore, - the data falling outside the new array but inside the boundary chunks - would be restored by a subsequent resize operation that grows the array size. - - """ - - return self._write_op(self._resize_nosync, *args) - - def _resize_nosync(self, *args): - # normalize new shape argument - old_shape = self._shape - new_shape = normalize_resize_args(old_shape, *args) - old_cdata_shape = self._cdata_shape - - # update metadata - self._shape = new_shape - self._flush_metadata_nosync() - - # determine the new number and arrangement of chunks - chunks = self._chunks - new_cdata_shape = tuple(math.ceil(s / c) for s, c in zip(new_shape, chunks)) - - # remove any chunks not within range - # The idea is that, along each dimension, - # only find and remove the chunk slices that exist in 'old' but not 'new' data. - # Note that a mutable list ('old_cdata_shape_working_list') is introduced here - # to dynamically adjust the number of chunks along the already-processed dimensions - # in order to avoid duplicate chunk removal. - chunk_store = self.chunk_store - old_cdata_shape_working_list = list(old_cdata_shape) - for idx_cdata, (val_old_cdata, val_new_cdata) in enumerate( - zip(old_cdata_shape_working_list, new_cdata_shape) - ): - for cidx in itertools.product( - *[ - range(n_new, n_old) if (idx == idx_cdata) else range(n_old) - for idx, (n_old, n_new) in enumerate( - zip(old_cdata_shape_working_list, new_cdata_shape) - ) - ] - ): - key = self._chunk_key(cidx) - try: - del chunk_store[key] - except KeyError: - # chunk not initialized - pass - old_cdata_shape_working_list[idx_cdata] = min(val_old_cdata, val_new_cdata) - - def append(self, data, axis=0): - """Append `data` to `axis`. - - Parameters - ---------- - data : array-like - Data to be appended. - axis : int - Axis along which to append. - - Returns - ------- - new_shape : tuple - - Notes - ----- - The size of all dimensions other than `axis` must match between this - array and `data`. - - Examples - -------- - >>> import numpy as np - >>> import zarr - >>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000) - >>> z = zarr.array(a, chunks=(1000, 100)) - >>> z.shape - (10000, 1000) - >>> z.append(a) - (20000, 1000) - >>> z.append(np.vstack([a, a]), axis=1) - (20000, 2000) - >>> z.shape - (20000, 2000) - - """ - return self._write_op(self._append_nosync, data, axis=axis) - - def _append_nosync(self, data, axis=0): - # ensure data is array-like - if not hasattr(data, "shape"): - data = np.asanyarray(data, like=self._meta_array) - - # ensure shapes are compatible for non-append dimensions - self_shape_preserved = tuple(s for i, s in enumerate(self._shape) if i != axis) - data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis) - if self_shape_preserved != data_shape_preserved: - raise ValueError( - "shape of data to append is not compatible with the array; " - "all dimensions must match except for the dimension being " - "appended" - ) - - # remember old shape - old_shape = self._shape - - # determine new shape - new_shape = tuple( - self._shape[i] if i != axis else self._shape[i] + data.shape[i] - for i in range(len(self._shape)) - ) - - # resize - self._resize_nosync(new_shape) - - # store data - # noinspection PyTypeChecker - append_selection = tuple( - slice(None) if i != axis else slice(old_shape[i], new_shape[i]) - for i in range(len(self._shape)) - ) - self[append_selection] = data - - return new_shape - - def view( - self, - shape=None, - chunks=None, - dtype=None, - fill_value=None, - filters=None, - read_only=None, - synchronizer=None, - ): - """Return an array sharing the same data. - - Parameters - ---------- - shape : int or tuple of ints - Array shape. - chunks : int or tuple of ints, optional - Chunk shape. - dtype : string or dtype, optional - NumPy dtype. - fill_value : object - Default value to use for uninitialized portions of the array. - filters : sequence, optional - Sequence of filters to use to encode chunk data prior to - compression. - read_only : bool, optional - True if array should be protected against modification. - synchronizer : object, optional - Array synchronizer. - - Notes - ----- - WARNING: This is an experimental feature and should be used with care. - There are plenty of ways to generate errors and/or cause data - corruption. - - Examples - -------- - - Bypass filters: - - >>> import zarr - >>> import numpy as np - >>> np.random.seed(42) - >>> labels = ['female', 'male'] - >>> data = np.random.choice(labels, size=10000) - >>> filters = [zarr.Categorize(labels=labels, - ... dtype=data.dtype, - ... astype='u1')] - >>> a = zarr.array(data, chunks=1000, filters=filters) - >>> a[:] - array(['female', 'male', 'female', ..., 'male', 'male', 'female'], - dtype='>> v = a.view(dtype='u1', filters=[]) - >>> v.is_view - True - >>> v[:] - array([1, 2, 1, ..., 2, 2, 1], dtype=uint8) - - Views can be used to modify data: - - >>> x = v[:] - >>> x.sort() - >>> v[:] = x - >>> v[:] - array([1, 1, 1, ..., 2, 2, 2], dtype=uint8) - >>> a[:] - array(['female', 'female', 'female', ..., 'male', 'male', 'male'], - dtype='>> data = np.random.randint(0, 2, size=10000, dtype='u1') - >>> a = zarr.array(data, chunks=1000) - >>> a[:] - array([0, 0, 1, ..., 1, 0, 0], dtype=uint8) - >>> v = a.view(dtype=bool) - >>> v[:] - array([False, False, True, ..., True, False, False]) - >>> np.all(a[:].view(dtype=bool) == v[:]) - np.True_ - - An array can be viewed with a dtype with a different item size, however - some care is needed to adjust the shape and chunk shape so that chunk - data is interpreted correctly: - - >>> data = np.arange(10000, dtype='u2') - >>> a = zarr.array(data, chunks=1000) - >>> a[:10] - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16) - >>> v = a.view(dtype='u1', shape=20000, chunks=2000) - >>> v[:10] - array([0, 0, 1, 0, 2, 0, 3, 0, 4, 0], dtype=uint8) - >>> np.all(a[:].view('u1') == v[:]) - np.True_ - - Change fill value for uninitialized chunks: - - >>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1') - >>> a[:] - array([-1, -1, -1, ..., -1, -1, -1], dtype=int8) - >>> v = a.view(fill_value=42) - >>> v[:] - array([42, 42, 42, ..., 42, 42, 42], dtype=int8) - - Note that resizing or appending to views is not permitted: - - >>> a = zarr.empty(10000) - >>> v = a.view() - >>> try: - ... v.resize(20000) - ... except PermissionError as e: - ... print(e) - operation not permitted for views - - """ - - store = self._store - chunk_store = self._chunk_store - path = self._path - if read_only is None: - read_only = self._read_only - if synchronizer is None: - synchronizer = self._synchronizer - a = Array( - store=store, - path=path, - chunk_store=chunk_store, - read_only=read_only, - synchronizer=synchronizer, - cache_metadata=True, - zarr_version=self._version, - ) - a._is_view = True - - # allow override of some properties - if dtype is None: - dtype = self._dtype - else: - dtype = np.dtype(dtype) - a._dtype = dtype - if shape is None: - shape = self._shape - else: - shape = normalize_shape(shape) - a._shape = shape - if chunks is not None: - chunks = normalize_chunks(chunks, shape, dtype.itemsize) - a._chunks = chunks - if fill_value is not None: - a._fill_value = fill_value - if filters is not None: - a._filters = filters - - return a - - def astype(self, dtype): - """Returns a view that does on the fly type conversion of the underlying data. - - Parameters - ---------- - dtype : string or dtype - NumPy dtype. - - Notes - ----- - This method returns a new Array object which is a view on the same - underlying chunk data. Modifying any data via the view is currently - not permitted and will result in an error. This is an experimental - feature and its behavior is subject to change in the future. - - See Also - -------- - Array.view - - Examples - -------- - - >>> import zarr - >>> import numpy as np - >>> data = np.arange(100, dtype=np.uint8) - >>> a = zarr.array(data, chunks=10) - >>> a[:] - array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99], dtype=uint8) - >>> v = a.astype(np.float32) - >>> v.is_view - True - >>> v[:] - array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., - 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., - 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., - 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., - 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., - 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., - 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., - 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., - 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., - 90., 91., 92., 93., 94., 95., 96., 97., 98., 99.], - dtype=float32) - """ - - dtype = np.dtype(dtype) - - filters = [] - if self._filters: - filters.extend(self._filters) - filters.insert(0, AsType(encode_dtype=self._dtype, decode_dtype=dtype)) - - return self.view(filters=filters, dtype=dtype, read_only=True) diff --git a/zarr/creation.py b/zarr/creation.py deleted file mode 100644 index f7f3d5a094..0000000000 --- a/zarr/creation.py +++ /dev/null @@ -1,755 +0,0 @@ -from collections.abc import MutableMapping -from typing import Optional, Tuple, Union, Sequence -from warnings import warn - -import numpy as np -import numpy.typing as npt -from numcodecs.abc import Codec -from numcodecs.registry import codec_registry - -from zarr._storage.store import DEFAULT_ZARR_VERSION -from zarr.core import Array -from zarr.errors import ( - ArrayNotFoundError, - ContainsArrayError, - ContainsGroupError, -) -from zarr.storage import ( - contains_array, - contains_group, - default_compressor, - init_array, - normalize_storage_path, - normalize_store_arg, -) -from zarr._storage.store import StorageTransformer -from zarr.sync import Synchronizer -from zarr.types import ZARR_VERSION, DIMENSION_SEPARATOR, MEMORY_ORDER, MetaArray, PathLike -from zarr.util import normalize_dimension_separator - - -def create( - shape: Union[int, Tuple[int, ...]], - chunks: Union[int, Tuple[int, ...], bool] = True, - dtype: Optional[npt.DTypeLike] = None, - compressor="default", - fill_value: Optional[int] = 0, - order: MEMORY_ORDER = "C", - store: Optional[Union[str, MutableMapping]] = None, - synchronizer: Optional[Synchronizer] = None, - overwrite: bool = False, - path: Optional[PathLike] = None, - chunk_store: Optional[MutableMapping] = None, - filters: Optional[Sequence[Codec]] = None, - cache_metadata: bool = True, - cache_attrs: bool = True, - read_only: bool = False, - object_codec: Optional[Codec] = None, - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - write_empty_chunks: bool = True, - *, - zarr_version: Optional[ZARR_VERSION] = None, - meta_array: Optional[MetaArray] = None, - storage_transformers: Sequence[StorageTransformer] = (), - **kwargs, -): - """Create an array. - - Parameters - ---------- - shape : int or tuple of ints - Array shape. - chunks : int or tuple of ints, optional - Chunk shape. If True, will be guessed from `shape` and `dtype`. If - False, will be set to `shape`, i.e., single chunk for the whole array. - If an int, the chunk size in each dimension will be given by the value - of `chunks`. Default is True. - dtype : string or dtype, optional - NumPy dtype. - compressor : Codec, optional - Primary compressor. - fill_value : object - Default value to use for uninitialized portions of the array. - order : {'C', 'F'}, optional - Memory layout to be used within each chunk. - store : MutableMapping or string - Store or path to directory in file system or name of zip file. - synchronizer : object, optional - Array synchronizer. - overwrite : bool, optional - If True, delete all pre-existing data in `store` at `path` before - creating the array. - path : string, optional - Path under which array is stored. - chunk_store : MutableMapping, optional - Separate storage for chunks. If not provided, `store` will be used - for storage of both chunks and metadata. - filters : sequence of Codecs, optional - Sequence of filters to use to encode chunk data prior to compression. - cache_metadata : bool, optional - If True, array configuration metadata will be cached for the - lifetime of the object. If False, array metadata will be reloaded - prior to all data access and modification operations (may incur - overhead depending on storage and data access pattern). - cache_attrs : bool, optional - If True (default), user attributes will be cached for attribute read - operations. If False, user attributes are reloaded from the store prior - to all attribute read operations. - read_only : bool, optional - True if array should be protected against modification. - object_codec : Codec, optional - A codec to encode object arrays, only needed if dtype=object. - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - - .. versionadded:: 2.8 - - write_empty_chunks : bool, optional - If True (default), all chunks will be stored regardless of their - contents. If False, each chunk is compared to the array's fill value - prior to storing. If a chunk is uniformly equal to the fill value, then - that chunk is not be stored, and the store entry for that chunk's key - is deleted. This setting enables sparser storage, as only chunks with - non-fill-value data are stored, at the expense of overhead associated - with checking the data of each chunk. - - .. versionadded:: 2.11 - - storage_transformers : sequence of StorageTransformers, optional - Setting storage transformers, changes the storage structure and behaviour - of data coming from the underlying store. The transformers are applied in the - order of the given sequence. Supplying an empty sequence is the same as omitting - the argument or setting it to None. May only be set when using zarr_version 3. - - .. versionadded:: 2.13 - - zarr_version : {None, 2, 3}, optional - The zarr protocol version of the created array. If None, it will be - inferred from ``store`` or ``chunk_store`` if they are provided, - otherwise defaulting to 2. - - .. versionadded:: 2.12 - - meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. - - .. versionadded:: 2.13 - - Returns - ------- - z : zarr.core.Array - - Examples - -------- - - Create an array with default settings:: - - >>> import zarr - >>> z = zarr.create((10000, 10000), chunks=(1000, 1000)) - >>> z - - - Create an array with different some different configuration options:: - - >>> from numcodecs import Blosc - >>> compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.BITSHUFFLE) - >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype='i1', order='F', - ... compressor=compressor) - >>> z - - - To create an array with object dtype requires a filter that can handle Python object - encoding, e.g., `MsgPack` or `Pickle` from `numcodecs`:: - - >>> from numcodecs import MsgPack - >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype=object, - ... object_codec=MsgPack()) - >>> z - - - Example with some filters, and also storing chunks separately from metadata:: - - >>> from numcodecs import Quantize, Adler32 - >>> store, chunk_store = dict(), dict() - >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype='f8', - ... filters=[Quantize(digits=2, dtype='f8'), Adler32()], - ... store=store, chunk_store=chunk_store) - >>> z - - - """ - if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) - - # handle polymorphic store arg - store = normalize_store_arg(store, zarr_version=zarr_version, mode="w") - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - - # API compatibility with h5py - compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) - - # optional array metadata - if dimension_separator is None: - dimension_separator = getattr(store, "_dimension_separator", None) - else: - store_separator = getattr(store, "_dimension_separator", None) - if store_separator not in (None, dimension_separator): - raise ValueError( - f"Specified dimension_separator: {dimension_separator}" - f"conflicts with store's separator: " - f"{store_separator}" - ) - dimension_separator = normalize_dimension_separator(dimension_separator) - - if zarr_version > 2 and path is None: - path = "/" - - # initialize array metadata - init_array( - store, - shape=shape, - chunks=chunks, - dtype=dtype, - compressor=compressor, - fill_value=fill_value, - order=order, - overwrite=overwrite, - path=path, - chunk_store=chunk_store, - filters=filters, - object_codec=object_codec, - dimension_separator=dimension_separator, - storage_transformers=storage_transformers, - ) - - # instantiate array - z = Array( - store, - path=path, - chunk_store=chunk_store, - synchronizer=synchronizer, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, - read_only=read_only, - write_empty_chunks=write_empty_chunks, - meta_array=meta_array, - ) - - return z - - -def _kwargs_compat(compressor, fill_value, kwargs): - # to be compatible with h5py, as well as backwards-compatible with Zarr - # 1.x, accept 'compression' and 'compression_opts' keyword arguments - - if compressor != "default": - # 'compressor' overrides 'compression' - if "compression" in kwargs: - warn( - "'compression' keyword argument overridden by 'compressor'", - stacklevel=3, - ) - del kwargs["compression"] - if "compression_opts" in kwargs: - warn( - "'compression_opts' keyword argument overridden by 'compressor'", - stacklevel=3, - ) - del kwargs["compression_opts"] - - elif "compression" in kwargs: - compression = kwargs.pop("compression") - compression_opts = kwargs.pop("compression_opts", None) - - if compression is None or compression == "none": - compressor = None - - elif compression == "default": - compressor = default_compressor - - elif isinstance(compression, str): - codec_cls = codec_registry[compression] - - # handle compression_opts - if isinstance(compression_opts, dict): - compressor = codec_cls(**compression_opts) - elif isinstance(compression_opts, (list, tuple)): - compressor = codec_cls(*compression_opts) - elif compression_opts is None: - compressor = codec_cls() - else: - # assume single argument, e.g., int - compressor = codec_cls(compression_opts) - - # be lenient here if user gives compressor as 'compression' - elif hasattr(compression, "get_config"): - compressor = compression - - else: - raise ValueError(f"bad value for compression: {compression!r}") - - # handle 'fillvalue' - if "fillvalue" in kwargs: - # to be compatible with h5py, accept 'fillvalue' instead of - # 'fill_value' - fill_value = kwargs.pop("fillvalue") - - # ignore other keyword arguments - for k in kwargs: - warn(f"ignoring keyword argument {k!r}", stacklevel=2) - - return compressor, fill_value - - -def empty(shape, **kwargs): - """Create an empty array. - - For parameter definitions see :func:`zarr.creation.create`. - - Notes - ----- - The contents of an empty Zarr array are not defined. On attempting to - retrieve data from an empty Zarr array, any values may be returned, - and these are not guaranteed to be stable from one access to the next. - - """ - return create(shape=shape, fill_value=None, **kwargs) - - -def zeros(shape, **kwargs): - """Create an array, with zero being used as the default value for - uninitialized portions of the array. - - For parameter definitions see :func:`zarr.creation.create`. - - Examples - -------- - >>> import zarr - >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000)) - >>> z - - >>> z[:2, :2] - array([[0., 0.], - [0., 0.]]) - - """ - - return create(shape=shape, fill_value=0, **kwargs) - - -def ones(shape, **kwargs): - """Create an array, with one being used as the default value for - uninitialized portions of the array. - - For parameter definitions see :func:`zarr.creation.create`. - - Examples - -------- - >>> import zarr - >>> z = zarr.ones((10000, 10000), chunks=(1000, 1000)) - >>> z - - >>> z[:2, :2] - array([[1., 1.], - [1., 1.]]) - - """ - - return create(shape=shape, fill_value=1, **kwargs) - - -def full(shape, fill_value, **kwargs): - """Create an array, with `fill_value` being used as the default value for - uninitialized portions of the array. - - For parameter definitions see :func:`zarr.creation.create`. - - Examples - -------- - >>> import zarr - >>> z = zarr.full((10000, 10000), chunks=(1000, 1000), fill_value=42) - >>> z - - >>> z[:2, :2] - array([[42., 42.], - [42., 42.]]) - - """ - - return create(shape=shape, fill_value=fill_value, **kwargs) - - -def _get_shape_chunks(a): - shape = None - chunks = None - - if hasattr(a, "shape") and isinstance(a.shape, tuple): - shape = a.shape - - if hasattr(a, "chunks") and isinstance(a.chunks, tuple) and (len(a.chunks) == len(a.shape)): - chunks = a.chunks - - elif hasattr(a, "chunklen"): - # bcolz carray - chunks = (a.chunklen,) + a.shape[1:] - - return shape, chunks - - -def array(data, **kwargs): - """Create an array filled with `data`. - - The `data` argument should be a NumPy array or array-like object. For - other parameter definitions see :func:`zarr.creation.create`. - - Examples - -------- - >>> import numpy as np - >>> import zarr - >>> a = np.arange(100000000).reshape(10000, 10000) - >>> z = zarr.array(a, chunks=(1000, 1000)) - >>> z - - - """ - - # ensure data is array-like - if not hasattr(data, "shape") or not hasattr(data, "dtype"): - data = np.asanyarray(data) - - # setup dtype - kw_dtype = kwargs.get("dtype") - if kw_dtype is None: - kwargs["dtype"] = data.dtype - else: - kwargs["dtype"] = kw_dtype - - # setup shape and chunks - data_shape, data_chunks = _get_shape_chunks(data) - kwargs["shape"] = data_shape - kw_chunks = kwargs.get("chunks") - if kw_chunks is None: - kwargs["chunks"] = data_chunks - else: - kwargs["chunks"] = kw_chunks - - # pop read-only to apply after storing the data - read_only = kwargs.pop("read_only", False) - - # instantiate array - z = create(**kwargs) - - # fill with data - z[...] = data - - # set read_only property afterwards - z.read_only = read_only - - return z - - -def open_array( - store=None, - mode="a", - shape=None, - chunks=True, - dtype=None, - compressor="default", - fill_value=0, - order="C", - synchronizer=None, - filters=None, - cache_metadata=True, - cache_attrs=True, - path=None, - object_codec=None, - chunk_store=None, - storage_options=None, - partial_decompress=False, - write_empty_chunks=True, - *, - zarr_version=None, - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - meta_array=None, - **kwargs, -): - """Open an array using file-mode-like semantics. - - Parameters - ---------- - store : MutableMapping or string, optional - Store or path to directory in file system or name of zip file. - mode : {'r', 'r+', 'a', 'w', 'w-'}, optional - Persistence mode: 'r' means read only (must exist); 'r+' means - read/write (must exist); 'a' means read/write (create if doesn't - exist); 'w' means create (overwrite if exists); 'w-' means create - (fail if exists). - shape : int or tuple of ints, optional - Array shape. - chunks : int or tuple of ints, optional - Chunk shape. If True, will be guessed from `shape` and `dtype`. If - False, will be set to `shape`, i.e., single chunk for the whole array. - If an int, the chunk size in each dimension will be given by the value - of `chunks`. Default is True. - dtype : string or dtype, optional - NumPy dtype. - compressor : Codec, optional - Primary compressor. - fill_value : object, optional - Default value to use for uninitialized portions of the array. - order : {'C', 'F'}, optional - Memory layout to be used within each chunk. - synchronizer : object, optional - Array synchronizer. - filters : sequence, optional - Sequence of filters to use to encode chunk data prior to compression. - cache_metadata : bool, optional - If True, array configuration metadata will be cached for the - lifetime of the object. If False, array metadata will be reloaded - prior to all data access and modification operations (may incur - overhead depending on storage and data access pattern). - cache_attrs : bool, optional - If True (default), user attributes will be cached for attribute read - operations. If False, user attributes are reloaded from the store prior - to all attribute read operations. - path : string, optional - Array path within store. - object_codec : Codec, optional - A codec to encode object arrays, only needed if dtype=object. - chunk_store : MutableMapping or string, optional - Store or path to directory in file system or name of zip file. - storage_options : dict - If using an fsspec URL to create the store, these will be passed to - the backend implementation. Ignored otherwise. - partial_decompress : bool, optional - If True and while the chunk_store is a FSStore and the compression used - is Blosc, when getting data from the array chunks will be partially - read and decompressed when possible. - write_empty_chunks : bool, optional - If True (default), all chunks will be stored regardless of their - contents. If False, each chunk is compared to the array's fill value - prior to storing. If a chunk is uniformly equal to the fill value, then - that chunk is not be stored, and the store entry for that chunk's key - is deleted. This setting enables sparser storage, as only chunks with - non-fill-value data are stored, at the expense of overhead associated - with checking the data of each chunk. - - .. versionadded:: 2.11 - - zarr_version : {None, 2, 3}, optional - The zarr protocol version of the array to be opened. If None, it will - be inferred from ``store`` or ``chunk_store`` if they are provided, - otherwise defaulting to 2. - dimension_separator : {None, '.', '/'}, optional - Can be used to specify whether the array is in a flat ('.') or nested - ('/') format. If None, the appropriate value will be read from `store` - when present. Otherwise, defaults to '.' when ``zarr_version == 2`` - and `/` otherwise. - meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. - - .. versionadded:: 2.15 - - Returns - ------- - z : zarr.core.Array - - Examples - -------- - >>> import numpy as np - >>> import zarr - >>> z1 = zarr.open_array('data/example.zarr', mode='w', shape=(10000, 10000), - ... chunks=(1000, 1000), fill_value=0) - >>> z1[:] = np.arange(100000000).reshape(10000, 10000) - >>> z1 - - >>> z2 = zarr.open_array('data/example.zarr', mode='r') - >>> z2 - - >>> np.all(z1[:] == z2[:]) - np.True_ - - Notes - ----- - There is no need to close an array. Data are automatically flushed to the - file system. - - """ - - # use same mode semantics as h5py - # r : read only, must exist - # r+ : read/write, must exist - # w : create, delete if exists - # w- or x : create, fail if exists - # a : read/write if exists, create otherwise (default) - - if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) - - # handle polymorphic store arg - store = normalize_store_arg( - store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - if chunk_store is not None: - chunk_store = normalize_store_arg( - chunk_store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) - - # respect the dimension separator specified in a store, if present - if dimension_separator is None: - if hasattr(store, "_dimension_separator"): - dimension_separator = store._dimension_separator - else: - dimension_separator = "." if zarr_version == 2 else "/" - - if zarr_version == 3 and path is None: - path = "array" # TODO: raise ValueError instead? - - path = normalize_storage_path(path) - - # API compatibility with h5py - compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) - - # ensure fill_value of correct type - if fill_value is not None: - fill_value = np.array(fill_value, dtype=dtype)[()] - - # ensure store is initialized - - if mode in ["r", "r+"]: - if not contains_array(store, path=path): - if contains_group(store, path=path): - raise ContainsGroupError(path) - raise ArrayNotFoundError(path) - - elif mode == "w": - init_array( - store, - shape=shape, - chunks=chunks, - dtype=dtype, - compressor=compressor, - fill_value=fill_value, - order=order, - filters=filters, - overwrite=True, - path=path, - object_codec=object_codec, - chunk_store=chunk_store, - dimension_separator=dimension_separator, - ) - - elif mode == "a": - if not contains_array(store, path=path): - if contains_group(store, path=path): - raise ContainsGroupError(path) - init_array( - store, - shape=shape, - chunks=chunks, - dtype=dtype, - compressor=compressor, - fill_value=fill_value, - order=order, - filters=filters, - path=path, - object_codec=object_codec, - chunk_store=chunk_store, - dimension_separator=dimension_separator, - ) - - elif mode in ["w-", "x"]: - if contains_group(store, path=path): - raise ContainsGroupError(path) - elif contains_array(store, path=path): - raise ContainsArrayError(path) - else: - init_array( - store, - shape=shape, - chunks=chunks, - dtype=dtype, - compressor=compressor, - fill_value=fill_value, - order=order, - filters=filters, - path=path, - object_codec=object_codec, - chunk_store=chunk_store, - dimension_separator=dimension_separator, - ) - - # determine read only status - read_only = mode == "r" - - # instantiate array - z = Array( - store, - read_only=read_only, - synchronizer=synchronizer, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, - path=path, - chunk_store=chunk_store, - write_empty_chunks=write_empty_chunks, - meta_array=meta_array, - ) - - return z - - -def _like_args(a, kwargs): - shape, chunks = _get_shape_chunks(a) - if shape is not None: - kwargs.setdefault("shape", shape) - if chunks is not None: - kwargs.setdefault("chunks", chunks) - - if hasattr(a, "dtype"): - kwargs.setdefault("dtype", a.dtype) - - if isinstance(a, Array): - kwargs.setdefault("compressor", a.compressor) - kwargs.setdefault("order", a.order) - kwargs.setdefault("filters", a.filters) - kwargs.setdefault("zarr_version", a._version) - else: - kwargs.setdefault("compressor", "default") - kwargs.setdefault("order", "C") - - -def empty_like(a, **kwargs): - """Create an empty array like `a`.""" - _like_args(a, kwargs) - return empty(**kwargs) - - -def zeros_like(a, **kwargs): - """Create an array of zeros like `a`.""" - _like_args(a, kwargs) - return zeros(**kwargs) - - -def ones_like(a, **kwargs): - """Create an array of ones like `a`.""" - _like_args(a, kwargs) - return ones(**kwargs) - - -def full_like(a, **kwargs): - """Create a filled array like `a`.""" - _like_args(a, kwargs) - if isinstance(a, Array): - kwargs.setdefault("fill_value", a.fill_value) - return full(**kwargs) - - -def open_like(a, path, **kwargs): - """Open a persistent array like `a`.""" - _like_args(a, kwargs) - if isinstance(a, Array): - kwargs.setdefault("fill_value", a.fill_value) - return open_array(path, **kwargs) diff --git a/zarr/errors.py b/zarr/errors.py deleted file mode 100644 index 85789fbcbf..0000000000 --- a/zarr/errors.py +++ /dev/null @@ -1,78 +0,0 @@ -class MetadataError(Exception): - pass - - -class CopyError(RuntimeError): - pass - - -class _BaseZarrError(ValueError): - _msg = "" - - def __init__(self, *args): - super().__init__(self._msg.format(*args)) - - -class ArrayIndexError(IndexError): - pass - - -class _BaseZarrIndexError(IndexError): - _msg = "" - - def __init__(self, *args): - super().__init__(self._msg.format(*args)) - - -class ContainsGroupError(_BaseZarrError): - _msg = "path {0!r} contains a group" - - -class ContainsArrayError(_BaseZarrError): - _msg = "path {0!r} contains an array" - - -class ArrayNotFoundError(_BaseZarrError): - _msg = "array not found at path %r' {0!r}" - - -class GroupNotFoundError(_BaseZarrError): - _msg = "group not found at path {0!r}" - - -class PathNotFoundError(_BaseZarrError): - _msg = "nothing found at path {0!r}" - - -class BadCompressorError(_BaseZarrError): - _msg = "bad compressor; expected Codec object, found {0!r}" - - -class FSPathExistNotDir(GroupNotFoundError): - _msg = "path exists but is not a directory: %r" - - -class ReadOnlyError(PermissionError): - def __init__(self): - super().__init__("object is read-only") - - -class BoundsCheckError(_BaseZarrIndexError): - _msg = "index out of bounds for dimension with length {0}" - - -class NegativeStepError(IndexError): - def __init__(self): - super().__init__("only slices with step >= 1 are supported") - - -def err_too_many_indices(selection, shape): - raise IndexError(f"too many indices for array; expected {len(shape)}, got {len(selection)}") - - -class VindexInvalidSelectionError(_BaseZarrIndexError): - _msg = ( - "unsupported selection type for vectorized indexing; only " - "coordinate selection (tuple of integer arrays) and mask selection " - "(single Boolean array) are supported; got {0!r}" - ) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py deleted file mode 100644 index 8894a5ed57..0000000000 --- a/zarr/hierarchy.py +++ /dev/null @@ -1,1609 +0,0 @@ -from collections.abc import MutableMapping -from itertools import islice - -import numpy as np - -from zarr._storage.store import ( - _get_metadata_suffix, - data_root, - meta_root, - DEFAULT_ZARR_VERSION, - assert_zarr_v3_api_available, -) -from zarr.attrs import Attributes -from zarr.core import Array -from zarr.creation import ( - array, - create, - empty, - empty_like, - full, - full_like, - ones, - ones_like, - zeros, - zeros_like, -) -from zarr.errors import ( - ContainsArrayError, - ContainsGroupError, - ArrayNotFoundError, - GroupNotFoundError, - ReadOnlyError, -) -from zarr.storage import ( - _get_hierarchy_metadata, - _prefix_to_group_key, - BaseStore, - MemoryStore, - attrs_key, - contains_array, - contains_group, - group_meta_key, - init_group, - listdir, - normalize_store_arg, - rename, - rmdir, -) -from zarr._storage.v3 import MemoryStoreV3 -from zarr.util import ( - InfoReporter, - TreeViewer, - is_valid_python_name, - nolock, - normalize_shape, - normalize_storage_path, -) - - -class Group(MutableMapping): - """Instantiate a group from an initialized store. - - Parameters - ---------- - store : MutableMapping - Group store, already initialized. - If the Group is used in a context manager, and the store has a ``close`` method, - it will be called on exit. - path : string, optional - Group path. - read_only : bool, optional - True if group should be protected against modification. - chunk_store : MutableMapping, optional - Separate storage for chunks. If not provided, `store` will be used - for storage of both chunks and metadata. - cache_attrs : bool, optional - If True (default), user attributes will be cached for attribute read - operations. If False, user attributes are reloaded from the store prior - to all attribute read operations. - synchronizer : object, optional - Array synchronizer. - - meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. - - .. versionadded:: 2.13 - - Attributes - ---------- - store - path - name - read_only - chunk_store - synchronizer - attrs - info - meta_array - - Methods - ------- - __len__ - __iter__ - __contains__ - __getitem__ - __enter__ - __exit__ - group_keys - groups - array_keys - arrays - visit - visitkeys - visitvalues - visititems - tree - create_group - require_group - create_groups - require_groups - create_dataset - require_dataset - create - empty - zeros - ones - full - array - empty_like - zeros_like - ones_like - full_like - info - move - - """ - - def __init__( - self, - store, - path=None, - read_only=False, - chunk_store=None, - cache_attrs=True, - synchronizer=None, - zarr_version=None, - *, - meta_array=None, - ): - store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) - if zarr_version is None: - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - - if zarr_version != 2: - assert_zarr_v3_api_available() - - if chunk_store is not None: - chunk_store: BaseStore = _normalize_store_arg(chunk_store, zarr_version=zarr_version) - self._store = store - self._chunk_store = chunk_store - self._path = normalize_storage_path(path) - if self._path: - self._key_prefix = self._path + "/" - else: - self._key_prefix = "" - self._read_only = read_only - self._synchronizer = synchronizer - if meta_array is not None: - self._meta_array = np.empty_like(meta_array, shape=()) - else: - self._meta_array = np.empty(()) - self._version = zarr_version - if self._version == 3: - self._data_key_prefix = data_root + self._key_prefix - self._data_path = data_root + self._path - self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) - self._metadata_key_suffix = _get_metadata_suffix(store=self._store) - - # guard conditions - if contains_array(store, path=self._path): - raise ContainsArrayError(path) - - # initialize metadata - mkey = None - try: - mkey = _prefix_to_group_key(self._store, self._key_prefix) - assert not mkey.endswith("root/.group") - meta_bytes = store[mkey] - except KeyError as e: - if self._version == 2: - raise GroupNotFoundError(path) from e - else: - implicit_prefix = meta_root + self._key_prefix - if self._store.list_prefix(implicit_prefix): - # implicit group does not have any metadata - self._meta = None - else: - raise GroupNotFoundError(path) from e - else: - self._meta = self._store._metadata_class.decode_group_metadata(meta_bytes) - - # setup attributes - if self._version == 2: - akey = self._key_prefix + attrs_key - else: - # Note: mkey doesn't actually exist for implicit groups, but the - # object can still be created. - akey = mkey - self._attrs = Attributes( - store, - key=akey, - read_only=read_only, - cache=cache_attrs, - synchronizer=synchronizer, - cached_dict=self._meta["attributes"] if self._version == 3 and self._meta else None, - ) - - # setup info - - @property - def store(self): - """A MutableMapping providing the underlying storage for the group.""" - return self._store - - @property - def path(self): - """Storage path.""" - return self._path - - @property - def name(self): - """Group name following h5py convention.""" - if self._path: - # follow h5py convention: add leading slash - name = self._path - if name[0] != "/": - name = "/" + name - return name - return "/" - - @property - def basename(self): - """Final component of name.""" - return self.name.split("/")[-1] - - @property - def read_only(self): - """A boolean, True if modification operations are not permitted.""" - return self._read_only - - @property - def chunk_store(self): - """A MutableMapping providing the underlying storage for array chunks.""" - if self._chunk_store is None: - return self._store - else: - return self._chunk_store - - @property - def synchronizer(self): - """Object used to synchronize write access to groups and arrays.""" - return self._synchronizer - - @property - def attrs(self): - """A MutableMapping containing user-defined attributes. Note that - attribute values must be JSON serializable.""" - return self._attrs - - @property - def info(self): - """Return diagnostic information about the group.""" - return InfoReporter(self) - - @property - def meta_array(self): - """An array-like instance to use for determining arrays to create and return - to users. - """ - return self._meta_array - - def __eq__(self, other): - return ( - isinstance(other, Group) - and self._store == other.store - and self._read_only == other.read_only - and self._path == other.path - # N.B., no need to compare attributes, should be covered by - # store comparison - ) - - def __iter__(self): - """Return an iterator over group member names. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) - >>> d2 = g1.create_dataset('quux', shape=200, chunks=20) - >>> for name in g1: - ... print(name) - bar - baz - foo - quux - - """ - if getattr(self._store, "_store_version", 2) == 2: - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_array(self._store, path) or contains_group(self._store, path): - yield key - else: - # TODO: Should this iterate over data folders and/or metadata - # folders and/or metadata files - - dir_path = meta_root + self._key_prefix - name_start = len(dir_path) - keys, prefixes = self._store.list_dir(dir_path) - - # yield any groups or arrays - sfx = self._metadata_key_suffix - for key in keys: - len_suffix = len(".group") + len(sfx) # same for .array - if key.endswith((".group" + sfx, ".array" + sfx)): - yield key[name_start:-len_suffix] - - # also yield any implicit groups - for prefix in prefixes: - prefix = prefix.rstrip("/") - # only implicit if there is no .group.sfx file - if prefix + ".group" + sfx not in self._store: - yield prefix[name_start:] - - # Note: omit data/root/ to avoid duplicate listings - # any group in data/root/ must has an entry in meta/root/ - - def __len__(self): - """Number of members.""" - return sum(1 for _ in self) - - def __repr__(self): - t = type(self) - r = f"<{t.__module__}.{t.__name__}" - if self.name: - r += f" {self.name!r}" - if self._read_only: - r += " read-only" - r += ">" - return r - - def __enter__(self): - """Return the Group for use as a context manager.""" - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """Call the close method of the underlying Store.""" - self.store.close() - - def info_items(self): - def typestr(o): - return f"{type(o).__module__}.{type(o).__name__}" - - items = [] - - # basic info - if self.name is not None: - items += [("Name", self.name)] - items += [ - ("Type", typestr(self)), - ("Read-only", str(self.read_only)), - ] - - # synchronizer - if self._synchronizer is not None: - items += [("Synchronizer type", typestr(self._synchronizer))] - - # storage info - items += [("Store type", typestr(self._store))] - if self._chunk_store is not None: - items += [("Chunk store type", typestr(self._chunk_store))] - - # members - items += [("No. members", len(self))] - array_keys = sorted(self.array_keys()) - group_keys = sorted(self.group_keys()) - items += [("No. arrays", len(array_keys))] - items += [("No. groups", len(group_keys))] - if array_keys: - items += [("Arrays", ", ".join(array_keys))] - if group_keys: - items += [("Groups", ", ".join(group_keys))] - - return items - - def __getstate__(self): - return { - "store": self._store, - "path": self._path, - "read_only": self._read_only, - "chunk_store": self._chunk_store, - "cache_attrs": self._attrs.cache, - "synchronizer": self._synchronizer, - "zarr_version": self._version, - "meta_array": self._meta_array, - } - - def __setstate__(self, state): - self.__init__(**state) - - def _item_path(self, item): - absolute = isinstance(item, str) and item and item[0] == "/" - path = normalize_storage_path(item) - if not absolute and self._path: - path = self._key_prefix + path - return path - - def __contains__(self, item): - """Test for group membership. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> d1 = g1.create_dataset('bar', shape=100, chunks=10) - >>> 'foo' in g1 - True - >>> 'bar' in g1 - True - >>> 'baz' in g1 - False - - """ - path = self._item_path(item) - return contains_array(self._store, path) or contains_group( - self._store, path, explicit_only=False - ) - - def __getitem__(self, item): - """Obtain a group member. - - Parameters - ---------- - item : string - Member name or path. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> d1 = g1.create_dataset('foo/bar/baz', shape=100, chunks=10) - >>> g1['foo'] - - >>> g1['foo/bar'] - - >>> g1['foo/bar/baz'] - - - """ - path = self._item_path(item) - try: - return Array( - self._store, - read_only=self._read_only, - path=path, - chunk_store=self._chunk_store, - synchronizer=self._synchronizer, - cache_attrs=self.attrs.cache, - zarr_version=self._version, - meta_array=self._meta_array, - ) - except ArrayNotFoundError: - pass - - try: - return Group( - self._store, - read_only=self._read_only, - path=path, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, - meta_array=self._meta_array, - ) - except GroupNotFoundError: - pass - - if self._version == 3: - implicit_group = meta_root + path + "/" - # non-empty folder in the metadata path implies an implicit group - if self._store.list_prefix(implicit_group): - return Group( - self._store, - read_only=self._read_only, - path=path, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, - meta_array=self._meta_array, - ) - else: - raise KeyError(item) - else: - raise KeyError(item) - - def __setitem__(self, item, value): - self.array(item, value, overwrite=True) - - def __delitem__(self, item): - return self._write_op(self._delitem_nosync, item) - - def _delitem_nosync(self, item): - path = self._item_path(item) - if contains_array(self._store, path) or contains_group( - self._store, path, explicit_only=False - ): - rmdir(self._store, path) - else: - raise KeyError(item) - - def __getattr__(self, item): - # https://github.com/jupyter/notebook/issues/2014 - # Save a possibly expensive lookup (for e.g. against cloud stores) - # Note: The _ipython_display_ method is required to display the right info as a side-effect. - # It is simpler to pretend it doesn't exist. - if item in ["_ipython_canary_method_should_not_exist_", "_ipython_display_"]: - raise AttributeError - - # allow access to group members via dot notation - try: - return self.__getitem__(item) - except KeyError as e: - raise AttributeError from e - - def __dir__(self): - # noinspection PyUnresolvedReferences - base = super().__dir__() - keys = sorted(set(base + list(self))) - keys = [k for k in keys if is_valid_python_name(k)] - return keys - - def _ipython_key_completions_(self): - return sorted(self) - - def group_keys(self): - """Return an iterator over member names for groups only. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) - >>> d2 = g1.create_dataset('quux', shape=200, chunks=20) - >>> sorted(g1.group_keys()) - ['bar', 'foo'] - - """ - if self._version == 2: - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_group(self._store, path): - yield key - else: - dir_name = meta_root + self._path - group_sfx = ".group" + self._metadata_key_suffix - # The fact that we call sorted means this can't be a streaming generator. - # The keys are already in memory. - all_keys = sorted(listdir(self._store, dir_name)) - for key in all_keys: - if key.endswith(group_sfx): - key = key[: -len(group_sfx)] - if key in all_keys: - # otherwise we will double count this group - continue - path = self._key_prefix + key - if path.endswith(".array" + self._metadata_key_suffix): - # skip array keys - continue - if contains_group(self._store, path, explicit_only=False): - yield key - - def groups(self): - """Return an iterator over (name, value) pairs for groups only. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) - >>> d2 = g1.create_dataset('quux', shape=200, chunks=20) - >>> for n, v in g1.groups(): - ... print(n, type(v)) - bar - foo - - """ - if self._version == 2: - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_group(self._store, path, explicit_only=False): - yield key, Group( - self._store, - path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, - ) - - else: - for key in self.group_keys(): - path = self._key_prefix + key - yield key, Group( - self._store, - path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, - ) - - def array_keys(self, recurse=False): - """Return an iterator over member names for arrays only. - - Parameters - ---------- - recurse : recurse, optional - Option to return member names for all arrays, even from groups - below the current one. If False, only member names for arrays in - the current group will be returned. Default value is False. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) - >>> d2 = g1.create_dataset('quux', shape=200, chunks=20) - >>> sorted(g1.array_keys()) - ['baz', 'quux'] - - """ - return self._array_iter(keys_only=True, method="array_keys", recurse=recurse) - - def arrays(self, recurse=False): - """Return an iterator over (name, value) pairs for arrays only. - - Parameters - ---------- - recurse : recurse, optional - Option to return (name, value) pairs for all arrays, even from groups - below the current one. If False, only (name, value) pairs for arrays in - the current group will be returned. Default value is False. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) - >>> d2 = g1.create_dataset('quux', shape=200, chunks=20) - >>> for n, v in g1.arrays(): - ... print(n, type(v)) - baz - quux - - """ - return self._array_iter(keys_only=False, method="arrays", recurse=recurse) - - def _array_iter(self, keys_only, method, recurse): - if self._version == 2: - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_array(self._store, path): - _key = key.rstrip("/") - yield _key if keys_only else (_key, self[key]) - elif recurse and contains_group(self._store, path): - group = self[key] - yield from getattr(group, method)(recurse=recurse) - else: - dir_name = meta_root + self._path - array_sfx = ".array" + self._metadata_key_suffix - group_sfx = ".group" + self._metadata_key_suffix - - for key in sorted(listdir(self._store, dir_name)): - if key.endswith(array_sfx): - key = key[: -len(array_sfx)] - _key = key.rstrip("/") - yield _key if keys_only else (_key, self[key]) - - path = self._key_prefix + key - assert not path.startswith("meta/") - if key.endswith(group_sfx): - # skip group metadata keys - continue - elif recurse and contains_group(self._store, path): - group = self[key] - yield from getattr(group, method)(recurse=recurse) - - def visitvalues(self, func): - """Run ``func`` on each object. - - Note: If ``func`` returns ``None`` (or doesn't return), - iteration continues. However, if ``func`` returns - anything else, it ceases and returns that value. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> g4 = g3.create_group('baz') - >>> g5 = g3.create_group('quux') - >>> def print_visitor(obj): - ... print(obj) - >>> g1.visitvalues(print_visitor) - - - - - >>> g3.visitvalues(print_visitor) - - - - """ - - def _visit(obj): - yield obj - keys = sorted(getattr(obj, "keys", lambda: [])()) - for k in keys: - yield from _visit(obj[k]) - - for each_obj in islice(_visit(self), 1, None): - value = func(each_obj) - if value is not None: - return value - - def visit(self, func): - """Run ``func`` on each object's path. - - Note: If ``func`` returns ``None`` (or doesn't return), - iteration continues. However, if ``func`` returns - anything else, it ceases and returns that value. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> g4 = g3.create_group('baz') - >>> g5 = g3.create_group('quux') - >>> def print_visitor(name): - ... print(name) - >>> g1.visit(print_visitor) - bar - bar/baz - bar/quux - foo - >>> g3.visit(print_visitor) - baz - quux - - Search for members matching some name query can be implemented using - ``visit`` that is, ``find`` and ``findall``. Consider the following - tree:: - - / - ├── aaa - │ └── bbb - │ └── ccc - │ └── aaa - ├── bar - └── foo - - It is created as follows: - - >>> root = zarr.group() - >>> foo = root.create_group("foo") - >>> bar = root.create_group("bar") - >>> root.create_group("aaa").create_group("bbb").create_group("ccc").create_group("aaa") - - - For ``find``, the first path that matches a given pattern (for example - "aaa") is returned. Note that a non-None value is returned in the visit - function to stop further iteration. - - >>> import re - >>> pattern = re.compile("aaa") - >>> found = None - >>> def find(path): - ... global found - ... if pattern.search(path) is not None: - ... found = path - ... return True - ... - >>> root.visit(find) - True - >>> print(found) - aaa - - For ``findall``, all the results are gathered into a list - - >>> pattern = re.compile("aaa") - >>> found = [] - >>> def findall(path): - ... if pattern.search(path) is not None: - ... found.append(path) - ... - >>> root.visit(findall) - >>> print(found) - ['aaa', 'aaa/bbb', 'aaa/bbb/ccc', 'aaa/bbb/ccc/aaa'] - - To match only on the last part of the path, use a greedy regex to filter - out the prefix: - - >>> prefix_pattern = re.compile(r".*/") - >>> pattern = re.compile("aaa") - >>> found = [] - >>> def findall(path): - ... match = prefix_pattern.match(path) - ... if match is None: - ... name = path - ... else: - ... _, end = match.span() - ... name = path[end:] - ... if pattern.search(name) is not None: - ... found.append(path) - ... return None - ... - >>> root.visit(findall) - >>> print(found) - ['aaa', 'aaa/bbb/ccc/aaa'] - """ - - base_len = len(self.name) - return self.visitvalues(lambda o: func(o.name[base_len:].lstrip("/"))) - - def visitkeys(self, func): - """An alias for :py:meth:`~Group.visit`.""" - - return self.visit(func) - - def visititems(self, func): - """Run ``func`` on each object's path and the object itself. - - Note: If ``func`` returns ``None`` (or doesn't return), - iteration continues. However, if ``func`` returns - anything else, it ceases and returns that value. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> g4 = g3.create_group('baz') - >>> g5 = g3.create_group('quux') - >>> def print_visitor(name, obj): - ... print((name, obj)) - >>> g1.visititems(print_visitor) - ('bar', ) - ('bar/baz', ) - ('bar/quux', ) - ('foo', ) - >>> g3.visititems(print_visitor) - ('baz', ) - ('quux', ) - - """ - - base_len = len(self.name) - return self.visitvalues(lambda o: func(o.name[base_len:].lstrip("/"), o)) - - def tree(self, expand=False, level=None): - """Provide a ``print``-able display of the hierarchy. - - Parameters - ---------- - expand : bool, optional - Only relevant for HTML representation. If True, tree will be fully expanded. - level : int, optional - Maximum depth to descend into hierarchy. - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> g4 = g3.create_group('baz') - >>> g5 = g3.create_group('quux') - >>> d1 = g5.create_dataset('baz', shape=100, chunks=10) - >>> g1.tree() - / - ├── bar - │ ├── baz - │ └── quux - │ └── baz (100,) float64 - └── foo - >>> g1.tree(level=2) - / - ├── bar - │ ├── baz - │ └── quux - └── foo - >>> g3.tree() - bar - ├── baz - └── quux - └── baz (100,) float64 - - Notes - ----- - Please note that this is an experimental feature. The behaviour of this - function is still evolving and the default output and/or parameters may change - in future versions. - - """ - - return TreeViewer(self, expand=expand, level=level) - - def _write_op(self, f, *args, **kwargs): - # guard condition - if self._read_only: - raise ReadOnlyError() - - if self._synchronizer is None: - # no synchronization - lock = nolock - else: - # synchronize on the root group - lock = self._synchronizer[group_meta_key] - - with lock: - return f(*args, **kwargs) - - def create_group(self, name, overwrite=False): - """Create a sub-group. - - Parameters - ---------- - name : string - Group name. - overwrite : bool, optional - If True, overwrite any existing array with the given name. - - Returns - ------- - g : zarr.hierarchy.Group - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.create_group('foo') - >>> g3 = g1.create_group('bar') - >>> g4 = g1.create_group('baz/quux') - - """ - - return self._write_op(self._create_group_nosync, name, overwrite=overwrite) - - def _create_group_nosync(self, name, overwrite=False): - path = self._item_path(name) - - # create terminal group - init_group(self._store, path=path, chunk_store=self._chunk_store, overwrite=overwrite) - - return Group( - self._store, - path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, - ) - - def create_groups(self, *names, **kwargs): - """Convenience method to create multiple groups in a single call.""" - return tuple(self.create_group(name, **kwargs) for name in names) - - def require_group(self, name, overwrite=False): - """Obtain a sub-group, creating one if it doesn't exist. - - Parameters - ---------- - name : string - Group name. - overwrite : bool, optional - Overwrite any existing array with given `name` if present. - - Returns - ------- - g : zarr.hierarchy.Group - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> g2 = g1.require_group('foo') - >>> g3 = g1.require_group('foo') - >>> g2 == g3 - True - - """ - - return self._write_op(self._require_group_nosync, name, overwrite=overwrite) - - def _require_group_nosync(self, name, overwrite=False): - path = self._item_path(name) - - # create terminal group if necessary - if not contains_group(self._store, path): - init_group( - store=self._store, path=path, chunk_store=self._chunk_store, overwrite=overwrite - ) - - return Group( - self._store, - path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, - ) - - def require_groups(self, *names): - """Convenience method to require multiple groups in a single call.""" - return tuple(self.require_group(name) for name in names) - - # noinspection PyIncorrectDocstring - def create_dataset(self, name, **kwargs): - """Create an array. - - Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the require_dataset() method. - - Parameters - ---------- - name : string - Array name. - data : array-like, optional - Initial data. - shape : int or tuple of ints - Array shape. - chunks : int or tuple of ints, optional - Chunk shape. If not provided, will be guessed from `shape` and - `dtype`. - dtype : string or dtype, optional - NumPy dtype. - compressor : Codec, optional - Primary compressor. - fill_value : object - Default value to use for uninitialized portions of the array. - order : {'C', 'F'}, optional - Memory layout to be used within each chunk. - synchronizer : zarr.sync.ArraySynchronizer, optional - Array synchronizer. - filters : sequence of Codecs, optional - Sequence of filters to use to encode chunk data prior to - compression. - overwrite : bool, optional - If True, replace any existing array or group with the given name. - cache_metadata : bool, optional - If True, array configuration metadata will be cached for the - lifetime of the object. If False, array metadata will be reloaded - prior to all data access and modification operations (may incur - overhead depending on storage and data access pattern). - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - - Returns - ------- - a : zarr.core.Array - - Examples - -------- - >>> import zarr - >>> g1 = zarr.group() - >>> d1 = g1.create_dataset('foo', shape=(10000, 10000), - ... chunks=(1000, 1000)) - >>> d1 - - >>> d2 = g1.create_dataset('bar/baz/qux', shape=(100, 100, 100), - ... chunks=(100, 10, 10)) - >>> d2 - - - """ - assert "mode" not in kwargs - - return self._write_op(self._create_dataset_nosync, name, **kwargs) - - def _create_dataset_nosync(self, name, data=None, **kwargs): - assert "mode" not in kwargs - path = self._item_path(name) - - # determine synchronizer - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - - # create array - if data is None: - a = create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) - - else: - a = array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) - - return a - - def require_dataset(self, name, shape, dtype=None, exact=False, **kwargs): - """Obtain an array, creating if it doesn't exist. - - Arrays are known as "datasets" in HDF5 terminology. For compatibility - with h5py, Zarr groups also implement the create_dataset() method. - - Other `kwargs` are as per :func:`zarr.hierarchy.Group.create_dataset`. - - Parameters - ---------- - name : string - Array name. - shape : int or tuple of ints - Array shape. - dtype : string or dtype, optional - NumPy dtype. - exact : bool, optional - If True, require `dtype` to match exactly. If false, require - `dtype` can be cast from array dtype. - - """ - - return self._write_op( - self._require_dataset_nosync, name, shape=shape, dtype=dtype, exact=exact, **kwargs - ) - - def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs): - path = self._item_path(name) - - if contains_array(self._store, path): - # array already exists at path, validate that it is the right shape and type - - synchronizer = kwargs.get("synchronizer", self._synchronizer) - cache_metadata = kwargs.get("cache_metadata", True) - cache_attrs = kwargs.get("cache_attrs", self.attrs.cache) - a = Array( - self._store, - path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - synchronizer=synchronizer, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, - meta_array=self._meta_array, - ) - shape = normalize_shape(shape) - if shape != a.shape: - raise TypeError( - f"shape do not match existing array; expected {a.shape}, got {shape}" - ) - dtype = np.dtype(dtype) - if exact: - if dtype != a.dtype: - raise TypeError(f"dtypes do not match exactly; expected {a.dtype}, got {dtype}") - else: - if not np.can_cast(dtype, a.dtype): - raise TypeError(f"dtypes ({dtype}, {a.dtype}) cannot be safely cast") - return a - - else: - return self._create_dataset_nosync(name, shape=shape, dtype=dtype, **kwargs) - - def create(self, name, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.create`.""" - return self._write_op(self._create_nosync, name, **kwargs) - - def _create_nosync(self, name, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) - - def empty(self, name, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.empty`.""" - return self._write_op(self._empty_nosync, name, **kwargs) - - def _empty_nosync(self, name, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return empty(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) - - def zeros(self, name, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.zeros`.""" - return self._write_op(self._zeros_nosync, name, **kwargs) - - def _zeros_nosync(self, name, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return zeros(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) - - def ones(self, name, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.ones`.""" - return self._write_op(self._ones_nosync, name, **kwargs) - - def _ones_nosync(self, name, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return ones(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) - - def full(self, name, fill_value, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.full`.""" - return self._write_op(self._full_nosync, name, fill_value, **kwargs) - - def _full_nosync(self, name, fill_value, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return full( - store=self._store, - path=path, - chunk_store=self._chunk_store, - fill_value=fill_value, - **kwargs, - ) - - def array(self, name, data, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.array`.""" - return self._write_op(self._array_nosync, name, data, **kwargs) - - def _array_nosync(self, name, data, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) - - def empty_like(self, name, data, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.empty_like`.""" - return self._write_op(self._empty_like_nosync, name, data, **kwargs) - - def _empty_like_nosync(self, name, data, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return empty_like( - data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs - ) - - def zeros_like(self, name, data, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.zeros_like`.""" - return self._write_op(self._zeros_like_nosync, name, data, **kwargs) - - def _zeros_like_nosync(self, name, data, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return zeros_like( - data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs - ) - - def ones_like(self, name, data, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.ones_like`.""" - return self._write_op(self._ones_like_nosync, name, data, **kwargs) - - def _ones_like_nosync(self, name, data, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return ones_like( - data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs - ) - - def full_like(self, name, data, **kwargs): - """Create an array. Keyword arguments as per - :func:`zarr.creation.full_like`.""" - return self._write_op(self._full_like_nosync, name, data, **kwargs) - - def _full_like_nosync(self, name, data, **kwargs): - path = self._item_path(name) - kwargs.setdefault("synchronizer", self._synchronizer) - kwargs.setdefault("cache_attrs", self.attrs.cache) - return full_like( - data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs - ) - - def _move_nosync(self, path, new_path): - rename(self._store, path, new_path) - if self._chunk_store is not None: - rename(self._chunk_store, path, new_path) - - def move(self, source, dest): - """Move contents from one path to another relative to the Group. - - Parameters - ---------- - source : string - Name or path to a Zarr object to move. - dest : string - New name or path of the Zarr object. - """ - - source = self._item_path(source) - dest = self._item_path(dest) - - # Check that source exists. - if not ( - contains_array(self._store, source) - or contains_group(self._store, source, explicit_only=False) - ): - raise ValueError('The source, "%s", does not exist.' % source) - if contains_array(self._store, dest) or contains_group( - self._store, dest, explicit_only=False - ): - raise ValueError('The dest, "%s", already exists.' % dest) - - # Ensure groups needed for `dest` exist. - if "/" in dest: - self.require_group("/" + dest.rsplit("/", 1)[0]) - - self._write_op(self._move_nosync, source, dest) - - # Override ipython repr methods, GH1716 - # https://ipython.readthedocs.io/en/stable/config/integrating.html#custom-methods - # " If the methods don’t exist, the standard repr() is used. If a method exists and - # returns None, it is treated the same as if it does not exist." - def _repr_html_(self): - return None - - def _repr_latex_(self): - return None - - def _repr_mimebundle_(self, **kwargs): - return None - - def _repr_svg_(self): - return None - - def _repr_png_(self): - return None - - def _repr_jpeg_(self): - return None - - def _repr_markdown_(self): - return None - - def _repr_javascript_(self): - return None - - def _repr_pdf_(self): - return None - - def _repr_json_(self): - return None - - -def _normalize_store_arg(store, *, storage_options=None, mode="r", zarr_version=None): - if zarr_version is None: - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - - if zarr_version != 2: - assert_zarr_v3_api_available() - - if store is None: - return MemoryStore() if zarr_version == 2 else MemoryStoreV3() - return normalize_store_arg( - store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) - - -def group( - store=None, - overwrite=False, - chunk_store=None, - cache_attrs=True, - synchronizer=None, - path=None, - *, - zarr_version=None, - meta_array=None, -): - """Create a group. - - Parameters - ---------- - store : MutableMapping or string, optional - Store or path to directory in file system. - overwrite : bool, optional - If True, delete any pre-existing data in `store` at `path` before - creating the group. - chunk_store : MutableMapping, optional - Separate storage for chunks. If not provided, `store` will be used - for storage of both chunks and metadata. - cache_attrs : bool, optional - If True (default), user attributes will be cached for attribute read - operations. If False, user attributes are reloaded from the store prior - to all attribute read operations. - synchronizer : object, optional - Array synchronizer. - path : string, optional - Group path within store. - meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. - - .. versionadded:: 2.16.1 - - Returns - ------- - g : zarr.hierarchy.Group - - Examples - -------- - Create a group in memory:: - - >>> import zarr - >>> g = zarr.group() - >>> g - - - Create a group with a different store:: - - >>> store = zarr.DirectoryStore('data/example.zarr') - >>> g = zarr.group(store=store, overwrite=True) - >>> g - - - """ - - # handle polymorphic store arg - store = _normalize_store_arg(store, zarr_version=zarr_version, mode="w") - if zarr_version is None: - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - - if zarr_version != 2: - assert_zarr_v3_api_available() - - path = normalize_storage_path(path) - - requires_init = None - if zarr_version == 2: - requires_init = overwrite or not contains_group(store) - elif zarr_version == 3: - requires_init = overwrite or not contains_group(store, path) - - if requires_init: - init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) - - return Group( - store, - read_only=False, - chunk_store=chunk_store, - cache_attrs=cache_attrs, - synchronizer=synchronizer, - path=path, - zarr_version=zarr_version, - meta_array=meta_array, - ) - - -def open_group( - store=None, - mode="a", - cache_attrs=True, - synchronizer=None, - path=None, - chunk_store=None, - storage_options=None, - *, - zarr_version=None, - meta_array=None, -): - """Open a group using file-mode-like semantics. - - Parameters - ---------- - store : MutableMapping or string, optional - Store or path to directory in file system or name of zip file. - mode : {'r', 'r+', 'a', 'w', 'w-'}, optional - Persistence mode: 'r' means read only (must exist); 'r+' means - read/write (must exist); 'a' means read/write (create if doesn't - exist); 'w' means create (overwrite if exists); 'w-' means create - (fail if exists). - cache_attrs : bool, optional - If True (default), user attributes will be cached for attribute read - operations. If False, user attributes are reloaded from the store prior - to all attribute read operations. - synchronizer : object, optional - Array synchronizer. - path : string, optional - Group path within store. - chunk_store : MutableMapping or string, optional - Store or path to directory in file system or name of zip file. - storage_options : dict - If using an fsspec URL to create the store, these will be passed to - the backend implementation. Ignored otherwise. - meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. - - .. versionadded:: 2.13 - - Returns - ------- - g : zarr.hierarchy.Group - - Examples - -------- - >>> import zarr - >>> root = zarr.open_group('data/example.zarr', mode='w') - >>> foo = root.create_group('foo') - >>> bar = root.create_group('bar') - >>> root - - >>> root2 = zarr.open_group('data/example.zarr', mode='a') - >>> root2 - - >>> root == root2 - True - - """ - - # handle polymorphic store arg - store = _normalize_store_arg( - store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) - if zarr_version is None: - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - - if zarr_version != 2: - assert_zarr_v3_api_available() - - if chunk_store is not None: - chunk_store = _normalize_store_arg( - chunk_store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) - if getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) != zarr_version: - raise ValueError("zarr_version of store and chunk_store must match") # pragma: no cover - - path = normalize_storage_path(path) - - # ensure store is initialized - - if mode in ["r", "r+"]: - if not contains_group(store, path=path): - if contains_array(store, path=path): - raise ContainsArrayError(path) - raise GroupNotFoundError(path) - - elif mode == "w": - init_group(store, overwrite=True, path=path, chunk_store=chunk_store) - - elif mode == "a": - if not contains_group(store, path=path): - if contains_array(store, path=path): - raise ContainsArrayError(path) - init_group(store, path=path, chunk_store=chunk_store) - - elif mode in ["w-", "x"]: - if contains_array(store, path=path): - raise ContainsArrayError(path) - elif contains_group(store, path=path): - raise ContainsGroupError(path) - else: - init_group(store, path=path, chunk_store=chunk_store) - - # determine read only status - read_only = mode == "r" - - return Group( - store, - read_only=read_only, - cache_attrs=cache_attrs, - synchronizer=synchronizer, - path=path, - chunk_store=chunk_store, - zarr_version=zarr_version, - meta_array=meta_array, - ) diff --git a/zarr/indexing.py b/zarr/indexing.py deleted file mode 100644 index 35c1e813b1..0000000000 --- a/zarr/indexing.py +++ /dev/null @@ -1,1079 +0,0 @@ -import collections -import itertools -import math -import numbers - -import numpy as np - - -from zarr.errors import ( - ArrayIndexError, - NegativeStepError, - err_too_many_indices, - VindexInvalidSelectionError, - BoundsCheckError, -) - - -def is_integer(x): - """True if x is an integer (both pure Python or NumPy). - - Note that Python's bool is considered an integer too. - """ - return isinstance(x, numbers.Integral) - - -def is_integer_list(x): - """True if x is a list of integers. - - This function assumes ie *does not check* that all elements of the list - have the same type. Mixed type lists will result in other errors that will - bubble up anyway. - """ - return isinstance(x, list) and len(x) > 0 and is_integer(x[0]) - - -def is_integer_array(x, ndim=None): - t = not np.isscalar(x) and hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype.kind in "ui" - if ndim is not None: - t = t and len(x.shape) == ndim - return t - - -def is_bool_array(x, ndim=None): - t = hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype == bool - if ndim is not None: - t = t and len(x.shape) == ndim - return t - - -def is_scalar(value, dtype): - if np.isscalar(value): - return True - if isinstance(value, tuple) and dtype.names and len(value) == len(dtype.names): - return True - if dtype.kind == "O" and not isinstance(value, np.ndarray): - return True - return False - - -def is_pure_fancy_indexing(selection, ndim): - """Check whether a selection contains only scalars or integer array-likes. - - Parameters - ---------- - selection : tuple, slice, or scalar - A valid selection value for indexing into arrays. - - Returns - ------- - is_pure : bool - True if the selection is a pure fancy indexing expression (ie not mixed - with boolean or slices). - """ - if ndim == 1: - if is_integer_list(selection) or is_integer_array(selection): - return True - # if not, we go through the normal path below, because a 1-tuple - # of integers is also allowed. - no_slicing = ( - isinstance(selection, tuple) - and len(selection) == ndim - and not (any(isinstance(elem, slice) or elem is Ellipsis for elem in selection)) - ) - return ( - no_slicing - and all( - is_integer(elem) or is_integer_list(elem) or is_integer_array(elem) - for elem in selection - ) - and any(is_integer_list(elem) or is_integer_array(elem) for elem in selection) - ) - - -def is_pure_orthogonal_indexing(selection, ndim): - if not ndim: - return False - - # Case 1: Selection is a single iterable of integers - if is_integer_list(selection) or is_integer_array(selection, ndim=1): - return True - - # Case two: selection contains either zero or one integer iterables. - # All other selection elements are slices or integers - return ( - isinstance(selection, tuple) - and len(selection) == ndim - and sum(is_integer_list(elem) or is_integer_array(elem) for elem in selection) <= 1 - and all( - is_integer_list(elem) or is_integer_array(elem) or isinstance(elem, (int, slice)) - for elem in selection - ) - ) - - -def normalize_integer_selection(dim_sel, dim_len): - # normalize type to int - dim_sel = int(dim_sel) - - # handle wraparound - if dim_sel < 0: - dim_sel = dim_len + dim_sel - - # handle out of bounds - if dim_sel >= dim_len or dim_sel < 0: - raise BoundsCheckError(dim_len) - - return dim_sel - - -ChunkDimProjection = collections.namedtuple( - "ChunkDimProjection", ("dim_chunk_ix", "dim_chunk_sel", "dim_out_sel") -) -"""A mapping from chunk to output array for a single dimension. - -Parameters ----------- -dim_chunk_ix - Index of chunk. -dim_chunk_sel - Selection of items from chunk array. -dim_out_sel - Selection of items in target (output) array. - -""" - - -class IntDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize - dim_sel = normalize_integer_selection(dim_sel, dim_len) - - # store attributes - self.dim_sel = dim_sel - self.dim_len = dim_len - self.dim_chunk_len = dim_chunk_len - self.nitems = 1 - - def __iter__(self): - dim_chunk_ix = self.dim_sel // self.dim_chunk_len - dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_chunk_sel = self.dim_sel - dim_offset - dim_out_sel = None - yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) - - -def ceildiv(a, b): - return math.ceil(a / b) - - -class SliceDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize - self.start, self.stop, self.step = dim_sel.indices(dim_len) - if self.step < 1: - raise NegativeStepError() - - # store attributes - self.dim_len = dim_len - self.dim_chunk_len = dim_chunk_len - self.nitems = max(0, ceildiv((self.stop - self.start), self.step)) - self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) - - def __iter__(self): - # figure out the range of chunks we need to visit - dim_chunk_ix_from = self.start // self.dim_chunk_len - dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len) - - # iterate over chunks in range - for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to): - # compute offsets for chunk within overall array - dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_limit = min(self.dim_len, (dim_chunk_ix + 1) * self.dim_chunk_len) - - # determine chunk length, accounting for trailing chunk - dim_chunk_len = dim_limit - dim_offset - - if self.start < dim_offset: - # selection starts before current chunk - dim_chunk_sel_start = 0 - remainder = (dim_offset - self.start) % self.step - if remainder: - dim_chunk_sel_start += self.step - remainder - # compute number of previous items, provides offset into output array - dim_out_offset = ceildiv((dim_offset - self.start), self.step) - - else: - # selection starts within current chunk - dim_chunk_sel_start = self.start - dim_offset - dim_out_offset = 0 - - if self.stop > dim_limit: - # selection ends after current chunk - dim_chunk_sel_stop = dim_chunk_len - - else: - # selection ends within current chunk - dim_chunk_sel_stop = self.stop - dim_offset - - dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step) - dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step) - - # If there are no elements on the selection within this chunk, then skip - if dim_chunk_nitems == 0: - continue - - dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems) - - yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) - - -def check_selection_length(selection, shape): - if len(selection) > len(shape): - err_too_many_indices(selection, shape) - - -def replace_ellipsis(selection, shape): - selection = ensure_tuple(selection) - - # count number of ellipsis present - n_ellipsis = sum(1 for i in selection if i is Ellipsis) - - if n_ellipsis > 1: - # more than 1 is an error - raise IndexError("an index can only have a single ellipsis ('...')") - - elif n_ellipsis == 1: - # locate the ellipsis, count how many items to left and right - n_items_l = selection.index(Ellipsis) # items to left of ellipsis - n_items_r = len(selection) - (n_items_l + 1) # items to right of ellipsis - n_items = len(selection) - 1 # all non-ellipsis items - - if n_items >= len(shape): - # ellipsis does nothing, just remove it - selection = tuple(i for i in selection if i != Ellipsis) - - else: - # replace ellipsis with as many slices are needed for number of dims - new_item = selection[:n_items_l] + ((slice(None),) * (len(shape) - n_items)) - if n_items_r: - new_item += selection[-n_items_r:] - selection = new_item - - # fill out selection if not completely specified - if len(selection) < len(shape): - selection += (slice(None),) * (len(shape) - len(selection)) - - # check selection not too long - check_selection_length(selection, shape) - - return selection - - -def replace_lists(selection): - return tuple( - np.asarray(dim_sel) if isinstance(dim_sel, list) else dim_sel for dim_sel in selection - ) - - -def ensure_tuple(v): - if not isinstance(v, tuple): - v = (v,) - return v - - -ChunkProjection = collections.namedtuple( - "ChunkProjection", ("chunk_coords", "chunk_selection", "out_selection") -) -"""A mapping of items from chunk to output array. Can be used to extract items from the -chunk array for loading into an output array. Can also be used to extract items from a -value array for setting/updating in a chunk array. - -Parameters ----------- -chunk_coords - Indices of chunk. -chunk_selection - Selection of items from chunk array. -out_selection - Selection of items in target (output) array. - -""" - - -def is_slice(s): - return isinstance(s, slice) - - -def is_contiguous_slice(s): - return is_slice(s) and (s.step is None or s.step == 1) - - -def is_positive_slice(s): - return is_slice(s) and (s.step is None or s.step >= 1) - - -def is_contiguous_selection(selection): - selection = ensure_tuple(selection) - return all((is_integer_array(s) or is_contiguous_slice(s) or s == Ellipsis) for s in selection) - - -def is_basic_selection(selection): - selection = ensure_tuple(selection) - return all(is_integer(s) or is_positive_slice(s) for s in selection) - - -# noinspection PyProtectedMember -class BasicIndexer: - def __init__(self, selection, array): - # handle ellipsis - selection = replace_ellipsis(selection, array._shape) - - # setup per-dimension indexers - dim_indexers = [] - for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): - dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) - - elif is_slice(dim_sel): - dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) - - else: - raise IndexError( - f"unsupported selection item for basic indexing; " - f"expected integer or slice, got {type(dim_sel)!r}" - ) - - dim_indexers.append(dim_indexer) - - self.dim_indexers = dim_indexers - self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) - self.drop_axes = None - - def __iter__(self): - for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) - chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple( - p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None - ) - - yield ChunkProjection(chunk_coords, chunk_selection, out_selection) - - -class BoolArrayDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): - # check number of dimensions - if not is_bool_array(dim_sel, 1): - raise IndexError( - "Boolean arrays in an orthogonal selection must " "be 1-dimensional only" - ) - - # check shape - if dim_sel.shape[0] != dim_len: - raise IndexError( - f"Boolean array has the wrong length for dimension; " - f"expected {dim_len}, got { dim_sel.shape[0]}" - ) - - # store attributes - self.dim_sel = dim_sel - self.dim_len = dim_len - self.dim_chunk_len = dim_chunk_len - self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) - - # precompute number of selected items for each chunk - self.chunk_nitems = np.zeros(self.nchunks, dtype="i8") - for dim_chunk_ix in range(self.nchunks): - dim_offset = dim_chunk_ix * self.dim_chunk_len - self.chunk_nitems[dim_chunk_ix] = np.count_nonzero( - self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] - ) - self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) - self.nitems = self.chunk_nitems_cumsum[-1] - self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0] - - def __iter__(self): - # iterate over chunks with at least one item - for dim_chunk_ix in self.dim_chunk_ixs: - # find region in chunk - dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] - - # pad out if final chunk - if dim_chunk_sel.shape[0] < self.dim_chunk_len: - tmp = np.zeros(self.dim_chunk_len, dtype=bool) - tmp[: dim_chunk_sel.shape[0]] = dim_chunk_sel - dim_chunk_sel = tmp - - # find region in output - if dim_chunk_ix == 0: - start = 0 - else: - start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] - stop = self.chunk_nitems_cumsum[dim_chunk_ix] - dim_out_sel = slice(start, stop) - - yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) - - -class Order: - UNKNOWN = 0 - INCREASING = 1 - DECREASING = 2 - UNORDERED = 3 - - @staticmethod - def check(a): - diff = np.diff(a) - diff_positive = diff >= 0 - n_diff_positive = np.count_nonzero(diff_positive) - all_increasing = n_diff_positive == len(diff_positive) - any_increasing = n_diff_positive > 0 - if all_increasing: - order = Order.INCREASING - elif any_increasing: - order = Order.UNORDERED - else: - order = Order.DECREASING - return order - - -def wraparound_indices(x, dim_len): - loc_neg = x < 0 - if np.any(loc_neg): - x[loc_neg] = x[loc_neg] + dim_len - - -def boundscheck_indices(x, dim_len): - if np.any(x < 0) or np.any(x >= dim_len): - raise BoundsCheckError(dim_len) - - -class IntArrayDimIndexer: - """Integer array selection against a single dimension.""" - - def __init__( - self, - dim_sel, - dim_len, - dim_chunk_len, - wraparound=True, - boundscheck=True, - order=Order.UNKNOWN, - ): - # ensure 1d array - dim_sel = np.asanyarray(dim_sel) - if not is_integer_array(dim_sel, 1): - raise IndexError( - "integer arrays in an orthogonal selection must be " "1-dimensional only" - ) - - # handle wraparound - if wraparound: - wraparound_indices(dim_sel, dim_len) - - # handle out of bounds - if boundscheck: - boundscheck_indices(dim_sel, dim_len) - - # store attributes - self.dim_len = dim_len - self.dim_chunk_len = dim_chunk_len - self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) - self.nitems = len(dim_sel) - - # determine which chunk is needed for each selection item - # note: for dense integer selections, the division operation here is the - # bottleneck - dim_sel_chunk = dim_sel // dim_chunk_len - - # determine order of indices - if order == Order.UNKNOWN: - order = Order.check(dim_sel) - self.order = order - - if self.order == Order.INCREASING: - self.dim_sel = dim_sel - self.dim_out_sel = None - elif self.order == Order.DECREASING: - self.dim_sel = dim_sel[::-1] - # TODO should be possible to do this without creating an arange - self.dim_out_sel = np.arange(self.nitems - 1, -1, -1) - else: - # sort indices to group by chunk - self.dim_out_sel = np.argsort(dim_sel_chunk) - self.dim_sel = np.take(dim_sel, self.dim_out_sel) - - # precompute number of selected items for each chunk - self.chunk_nitems = np.bincount(dim_sel_chunk, minlength=self.nchunks) - - # find chunks that we need to visit - self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0] - - # compute offsets into the output array - self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) - - def __iter__(self): - for dim_chunk_ix in self.dim_chunk_ixs: - # find region in output - if dim_chunk_ix == 0: - start = 0 - else: - start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] - stop = self.chunk_nitems_cumsum[dim_chunk_ix] - if self.order == Order.INCREASING: - dim_out_sel = slice(start, stop) - else: - dim_out_sel = self.dim_out_sel[start:stop] - - # find region in chunk - dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_chunk_sel = self.dim_sel[start:stop] - dim_offset - - yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) - - -def slice_to_range(s: slice, l: int): # noqa: E741 - return range(*s.indices(l)) - - -def ix_(selection, shape): - """Convert an orthogonal selection to a numpy advanced (fancy) selection, like numpy.ix_ - but with support for slices and single ints.""" - - # normalisation - selection = replace_ellipsis(selection, shape) - - # replace slice and int as these are not supported by numpy.ix_ - selection = [ - ( - slice_to_range(dim_sel, dim_len) - if isinstance(dim_sel, slice) - else [dim_sel] if is_integer(dim_sel) else dim_sel - ) - for dim_sel, dim_len in zip(selection, shape) - ] - - # now get numpy to convert to a coordinate selection - selection = np.ix_(*selection) - - return selection - - -def oindex(a, selection): - """Implementation of orthogonal indexing with slices and ints.""" - selection = replace_ellipsis(selection, a.shape) - drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) - selection = ix_(selection, a.shape) - result = a[selection] - if drop_axes: - result = result.squeeze(axis=drop_axes) - return result - - -def oindex_set(a, selection, value): - selection = replace_ellipsis(selection, a.shape) - drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) - selection = ix_(selection, a.shape) - if not np.isscalar(value) and drop_axes: - value = np.asanyarray(value) - value_selection = [slice(None)] * len(a.shape) - for i in drop_axes: - value_selection[i] = np.newaxis - value_selection = tuple(value_selection) - value = value[value_selection] - a[selection] = value - - -# noinspection PyProtectedMember -class OrthogonalIndexer: - def __init__(self, selection, array): - # handle ellipsis - selection = replace_ellipsis(selection, array._shape) - - # normalize list to array - selection = replace_lists(selection) - - # setup per-dimension indexers - dim_indexers = [] - for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): - dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) - - elif isinstance(dim_sel, slice): - dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) - - elif is_integer_array(dim_sel): - dim_indexer = IntArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) - - elif is_bool_array(dim_sel): - dim_indexer = BoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) - - else: - raise IndexError( - f"unsupported selection item for orthogonal indexing; " - f"expected integer, slice, integer array or Boolean " - f"array, got {type(dim_sel)!r}" - ) - - dim_indexers.append(dim_indexer) - - self.array = array - self.dim_indexers = dim_indexers - self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) - self.is_advanced = not is_basic_selection(selection) - if self.is_advanced: - self.drop_axes = tuple( - i - for i, dim_indexer in enumerate(self.dim_indexers) - if isinstance(dim_indexer, IntDimIndexer) - ) - else: - self.drop_axes = None - - def __iter__(self): - for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) - chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple( - p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None - ) - - # handle advanced indexing arrays orthogonally - if self.is_advanced: - # N.B., numpy doesn't support orthogonal indexing directly as yet, - # so need to work around via np.ix_. Also np.ix_ does not support a - # mixture of arrays and slices or integers, so need to convert slices - # and integers into ranges. - chunk_selection = ix_(chunk_selection, self.array._chunks) - - # special case for non-monotonic indices - if not is_basic_selection(out_selection): - out_selection = ix_(out_selection, self.shape) - - yield ChunkProjection(chunk_coords, chunk_selection, out_selection) - - -class OIndex: - def __init__(self, array): - self.array = array - - def __getitem__(self, selection): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - return self.array.get_orthogonal_selection(selection, fields=fields) - - def __setitem__(self, selection, value): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - return self.array.set_orthogonal_selection(selection, value, fields=fields) - - -# noinspection PyProtectedMember -class BlockIndexer: - def __init__(self, selection, array): - # handle ellipsis - selection = replace_ellipsis(selection, array._shape) - - # normalize list to array - selection = replace_lists(selection) - - # setup per-dimension indexers - dim_indexers = [] - for dim_sel, dim_len, dim_chunk_size in zip(selection, array._shape, array._chunks): - dim_numchunks = int(np.ceil(dim_len / dim_chunk_size)) - - if is_integer(dim_sel): - if dim_sel < 0: - dim_sel = dim_numchunks + dim_sel - - start = dim_sel * dim_chunk_size - stop = start + dim_chunk_size - slice_ = slice(start, stop) - - elif is_slice(dim_sel): - start = dim_sel.start if dim_sel.start is not None else 0 - stop = dim_sel.stop if dim_sel.stop is not None else dim_numchunks - - if dim_sel.step not in {1, None}: - raise IndexError( - f"unsupported selection item for block indexing; " - f"expected integer or slice with step=1, got {type(dim_sel)!r}" - ) - - # Can't reuse wraparound_indices because it expects a numpy array - # We have integers here. - if start < 0: - start = dim_numchunks + start - if stop < 0: - stop = dim_numchunks + stop - - start = start * dim_chunk_size - stop = stop * dim_chunk_size - slice_ = slice(start, stop) - - else: - raise IndexError( - f"unsupported selection item for block indexing; " - f"expected integer or slice, got {type(dim_sel)!r}" - ) - - dim_indexer = SliceDimIndexer(slice_, dim_len, dim_chunk_size) - dim_indexers.append(dim_indexer) - - if start >= dim_len or start < 0: - raise BoundsCheckError(dim_len) - - self.dim_indexers = dim_indexers - self.shape = tuple(s.nitems for s in self.dim_indexers) - self.drop_axes = None - - def __iter__(self): - for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) - chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple( - p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None - ) - - yield ChunkProjection(chunk_coords, chunk_selection, out_selection) - - -class BlockIndex: - def __init__(self, array): - self.array = array - - def __getitem__(self, selection): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - return self.array.get_block_selection(selection, fields=fields) - - def __setitem__(self, selection, value): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - return self.array.set_block_selection(selection, value, fields=fields) - - -# noinspection PyProtectedMember -def is_coordinate_selection(selection, array): - return (len(selection) == len(array._shape)) and all( - is_integer(dim_sel) or is_integer_array(dim_sel) for dim_sel in selection - ) - - -# noinspection PyProtectedMember -def is_mask_selection(selection, array): - return ( - len(selection) == 1 and is_bool_array(selection[0]) and selection[0].shape == array._shape - ) - - -# noinspection PyProtectedMember -class CoordinateIndexer: - def __init__(self, selection, array): - # some initial normalization - selection = ensure_tuple(selection) - selection = tuple([i] if is_integer(i) else i for i in selection) - selection = replace_lists(selection) - - # validation - if not is_coordinate_selection(selection, array): - raise IndexError( - f"invalid coordinate selection; expected one integer " - f"(coordinate) array per dimension of the target array, " - f"got {selection!r}" - ) - - # handle wraparound, boundscheck - for dim_sel, dim_len in zip(selection, array.shape): - # handle wraparound - wraparound_indices(dim_sel, dim_len) - - # handle out of bounds - boundscheck_indices(dim_sel, dim_len) - - # compute chunk index for each point in the selection - chunks_multi_index = tuple( - dim_sel // dim_chunk_len for (dim_sel, dim_chunk_len) in zip(selection, array._chunks) - ) - - # broadcast selection - this will raise error if array dimensions don't match - selection = np.broadcast_arrays(*selection) - chunks_multi_index = np.broadcast_arrays(*chunks_multi_index) - - # remember shape of selection, because we will flatten indices for processing - self.sel_shape = selection[0].shape if selection[0].shape else (1,) - - # flatten selection - selection = [dim_sel.reshape(-1) for dim_sel in selection] - chunks_multi_index = [dim_chunks.reshape(-1) for dim_chunks in chunks_multi_index] - - # ravel chunk indices - chunks_raveled_indices = np.ravel_multi_index(chunks_multi_index, dims=array._cdata_shape) - - # group points by chunk - if np.any(np.diff(chunks_raveled_indices) < 0): - # optimisation, only sort if needed - sel_sort = np.argsort(chunks_raveled_indices) - selection = tuple(dim_sel[sel_sort] for dim_sel in selection) - else: - sel_sort = None - - # store attributes - self.selection = selection - self.sel_sort = sel_sort - self.shape = selection[0].shape if selection[0].shape else (1,) - self.drop_axes = None - self.array = array - - # precompute number of selected items for each chunk - self.chunk_nitems = np.bincount(chunks_raveled_indices, minlength=array.nchunks) - self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) - # locate the chunks we need to process - self.chunk_rixs = np.nonzero(self.chunk_nitems)[0] - - # unravel chunk indices - self.chunk_mixs = np.unravel_index(self.chunk_rixs, array._cdata_shape) - - def __iter__(self): - # iterate over chunks - for i, chunk_rix in enumerate(self.chunk_rixs): - chunk_coords = tuple(m[i] for m in self.chunk_mixs) - if chunk_rix == 0: - start = 0 - else: - start = self.chunk_nitems_cumsum[chunk_rix - 1] - stop = self.chunk_nitems_cumsum[chunk_rix] - if self.sel_sort is None: - out_selection = slice(start, stop) - else: - out_selection = self.sel_sort[start:stop] - - chunk_offsets = tuple( - dim_chunk_ix * dim_chunk_len - for dim_chunk_ix, dim_chunk_len in zip(chunk_coords, self.array._chunks) - ) - chunk_selection = tuple( - dim_sel[start:stop] - dim_chunk_offset - for (dim_sel, dim_chunk_offset) in zip(self.selection, chunk_offsets) - ) - - yield ChunkProjection(chunk_coords, chunk_selection, out_selection) - - -# noinspection PyProtectedMember -class MaskIndexer(CoordinateIndexer): - def __init__(self, selection, array): - # some initial normalization - selection = ensure_tuple(selection) - selection = replace_lists(selection) - - # validation - if not is_mask_selection(selection, array): - raise IndexError( - f"invalid mask selection; expected one Boolean (mask)" - f"array with the same shape as the target array, got {selection!r}" - ) - - # convert to indices - selection = np.nonzero(selection[0]) - - # delegate the rest to superclass - super().__init__(selection, array) - - -class VIndex: - def __init__(self, array): - self.array = array - - def __getitem__(self, selection): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - if is_coordinate_selection(selection, self.array): - return self.array.get_coordinate_selection(selection, fields=fields) - elif is_mask_selection(selection, self.array): - return self.array.get_mask_selection(selection, fields=fields) - else: - raise VindexInvalidSelectionError(selection) - - def __setitem__(self, selection, value): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - if is_coordinate_selection(selection, self.array): - self.array.set_coordinate_selection(selection, value, fields=fields) - elif is_mask_selection(selection, self.array): - self.array.set_mask_selection(selection, value, fields=fields) - else: - raise VindexInvalidSelectionError(selection) - - -def check_fields(fields, dtype): - # early out - if fields is None: - return dtype - # check type - if not isinstance(fields, (str, list, tuple)): - raise IndexError( - f"'fields' argument must be a string or list of strings; found " f"{type(fields)!r}" - ) - if fields: - if dtype.names is None: - raise IndexError("invalid 'fields' argument, array does not have any fields") - try: - if isinstance(fields, str): - # single field selection - out_dtype = dtype[fields] - else: - # multiple field selection - out_dtype = np.dtype([(f, dtype[f]) for f in fields]) - except KeyError as e: - raise IndexError(f"invalid 'fields' argument, field not found: {e!r}") from e - else: - return out_dtype - else: - return dtype - - -def check_no_multi_fields(fields): - if isinstance(fields, list): - if len(fields) == 1: - return fields[0] - elif len(fields) > 1: - raise IndexError("multiple fields are not supported for this operation") - return fields - - -def pop_fields(selection): - if isinstance(selection, str): - # single field selection - fields = selection - selection = () - elif not isinstance(selection, tuple): - # single selection item, no fields - fields = None - # leave selection as-is - else: - # multiple items, split fields from selection items - fields = [f for f in selection if isinstance(f, str)] - fields = fields[0] if len(fields) == 1 else fields - selection = tuple(s for s in selection if not isinstance(s, str)) - selection = selection[0] if len(selection) == 1 else selection - return fields, selection - - -def make_slice_selection(selection): - ls = [] - for dim_selection in selection: - if is_integer(dim_selection): - ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) - elif isinstance(dim_selection, np.ndarray): - if len(dim_selection) == 1: - ls.append(slice(int(dim_selection[0]), int(dim_selection[0]) + 1, 1)) - else: - raise ArrayIndexError() - else: - ls.append(dim_selection) - return ls - - -class PartialChunkIterator: - """Iterator to retrieve the specific coordinates of requested data - from within a compressed chunk. - - Parameters - ---------- - selection : tuple - tuple of slice objects to take from the chunk - arr_shape : shape of chunk to select data from - - Attributes - ----------- - arr_shape - selection - - Returns - ------- - Tuple with 3 elements: - - start: int - elements offset in the chunk to read from - nitems: int - number of elements to read in the chunk from start - partial_out_selection: list of slices - indices of a temporary empty array of size `Array._chunks` to assign - the decompressed data to after the partial read. - - Notes - ----- - An array is flattened when compressed with blosc, so this iterator takes - the wanted selection of an array and determines the wanted coordinates - of the flattened, compressed data to be read and then decompressed. The - decompressed data is then placed in a temporary empty array of size - `Array._chunks` at the indices yielded as partial_out_selection. - Once all the slices yielded by this iterator have been read, decompressed - and written to the temporary array, the wanted slice of the chunk can be - indexed from the temporary array and written to the out_selection slice - of the out array. - - """ - - def __init__(self, selection, arr_shape): - selection = make_slice_selection(selection) - self.arr_shape = arr_shape - - # number of selection dimensions can't be greater than the number of chunk dimensions - if len(selection) > len(self.arr_shape): - raise ValueError( - "Selection has more dimensions then the array:\n" - f"selection dimensions = {len(selection)}\n" - f"array dimensions = {len(self.arr_shape)}" - ) - - # any selection can not be out of the range of the chunk - selection_shape = np.empty(self.arr_shape)[tuple(selection)].shape - if any( - selection_dim < 0 or selection_dim > arr_dim - for selection_dim, arr_dim in zip(selection_shape, self.arr_shape) - ): - raise IndexError( - "a selection index is out of range for the dimension" - ) # pragma: no cover - - for i, dim_size in enumerate(self.arr_shape[::-1]): - index = len(self.arr_shape) - (i + 1) - if index <= len(selection) - 1: - slice_size = selection_shape[index] - if slice_size == dim_size and index > 0: - selection.pop() - else: - break - - chunk_loc_slices = [] - last_dim_slice = None if selection[-1].step > 1 else selection.pop() - for arr_shape_i, sl in zip(arr_shape, selection): - dim_chunk_loc_slices = [] - assert isinstance(sl, slice) - for x in slice_to_range(sl, arr_shape_i): - dim_chunk_loc_slices.append(slice(x, x + 1, 1)) - chunk_loc_slices.append(dim_chunk_loc_slices) - if last_dim_slice: - chunk_loc_slices.append([last_dim_slice]) - self.chunk_loc_slices = list(itertools.product(*chunk_loc_slices)) - - def __iter__(self): - chunk1 = self.chunk_loc_slices[0] - nitems = (chunk1[-1].stop - chunk1[-1].start) * np.prod( - self.arr_shape[len(chunk1) :], dtype=int - ) - for partial_out_selection in self.chunk_loc_slices: - start = 0 - for i, sl in enumerate(partial_out_selection): - start += sl.start * np.prod(self.arr_shape[i + 1 :], dtype=int) - yield start, nitems, partial_out_selection diff --git a/zarr/meta.py b/zarr/meta.py deleted file mode 100644 index 44a2b7ebec..0000000000 --- a/zarr/meta.py +++ /dev/null @@ -1,580 +0,0 @@ -import base64 -import itertools -from collections.abc import Mapping - -import numcodecs -import numpy as np -from numcodecs.abc import Codec - -from zarr.errors import MetadataError -from zarr.util import json_dumps, json_loads - -from typing import cast, Union, Any, List, Mapping as MappingType, Optional, TYPE_CHECKING - -if TYPE_CHECKING: # pragma: no cover - from zarr._storage.store import StorageTransformer - - -ZARR_FORMAT = 2 -ZARR_FORMAT_v3 = 3 - -# FLOAT_FILLS = {"NaN": np.nan, "Infinity": np.PINF, "-Infinity": np.NINF} - -_default_entry_point_metadata_v3 = { - "zarr_format": "https://purl.org/zarr/spec/protocol/core/3.0", - "metadata_encoding": "https://purl.org/zarr/spec/protocol/core/3.0", - "metadata_key_suffix": ".json", - "extensions": [], -} - -_v3_core_types = set("".join(d) for d in itertools.product("<>", ("u", "i", "f"), ("2", "4", "8"))) -_v3_core_types = {"bool", "i1", "u1"} | _v3_core_types - -# The set of complex types allowed ({"c8", ">c16"}) -_v3_complex_types = set(f"{end}c{_bytes}" for end, _bytes in itertools.product("<>", ("8", "16"))) - -# All dtype.str values corresponding to datetime64 and timedelta64 -# see: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units -_date_units = ["Y", "M", "W", "D"] -_time_units = ["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"] -_v3_datetime_types = set( - f"{end}{kind}8[{unit}]" - for end, unit, kind in itertools.product("<>", _date_units + _time_units, ("m", "M")) -) - - -def get_extended_dtype_info(dtype) -> dict: - if dtype.str in _v3_complex_types: - return dict( - extension="https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/extensions/complex-dtypes/v1.0.html", # noqa - type=dtype.str, - fallback=None, - ) - elif dtype.str == "|O": - return dict( - extension="TODO: object array protocol URL", # noqa - type=dtype.str, - fallback=None, - ) - elif dtype.str.startswith("|S"): - return dict( - extension="TODO: bytestring array protocol URL", # noqa - type=dtype.str, - fallback=None, - ) - elif dtype.str.startswith("U"): - return dict( - extension="TODO: unicode array protocol URL", # noqa - type=dtype.str, - fallback=None, - ) - elif dtype.str.startswith("|V"): - return dict( - extension="TODO: structured array protocol URL", # noqa - type=dtype.descr, - fallback=None, - ) - elif dtype.str in _v3_datetime_types: - return dict( - extension="https://zarr-specs.readthedocs.io/en/latest/extensions/data-types/datetime/v1.0.html", # noqa - type=dtype.str, - fallback=None, - ) - else: - raise ValueError(f"Unsupported dtype: {dtype}") - - -class Metadata2: - ZARR_FORMAT = ZARR_FORMAT - - @classmethod - def parse_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - # Here we allow that a store may return an already-parsed metadata object, - # or a string of JSON that we will parse here. We allow for an already-parsed - # object to accommodate a consolidated metadata store, where all the metadata for - # all groups and arrays will already have been parsed from JSON. - - if isinstance(s, Mapping): - # assume metadata has already been parsed into a mapping object - meta = s - - else: - # assume metadata needs to be parsed as JSON - meta = json_loads(s) - - return meta - - @classmethod - def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - meta = cls.parse_metadata(s) - - # check metadata format - zarr_format = meta.get("zarr_format", None) - if zarr_format != cls.ZARR_FORMAT: - raise MetadataError(f"unsupported zarr format: {zarr_format}") - - # extract array metadata fields - try: - dtype = cls.decode_dtype(meta["dtype"]) - if dtype.hasobject: - import numcodecs - - object_codec = numcodecs.get_codec(meta["filters"][0]) - else: - object_codec = None - - dimension_separator = meta.get("dimension_separator", None) - fill_value = cls.decode_fill_value(meta["fill_value"], dtype, object_codec) - meta = dict( - zarr_format=meta["zarr_format"], - shape=tuple(meta["shape"]), - chunks=tuple(meta["chunks"]), - dtype=dtype, - compressor=meta["compressor"], - fill_value=fill_value, - order=meta["order"], - filters=meta["filters"], - ) - if dimension_separator: - meta["dimension_separator"] = dimension_separator - except Exception as e: - raise MetadataError("error decoding metadata") from e - else: - return meta - - @classmethod - def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: - dtype = meta["dtype"] - sdshape = () - if dtype.subdtype is not None: - dtype, sdshape = dtype.subdtype - - dimension_separator = meta.get("dimension_separator") - if dtype.hasobject: - import numcodecs - - object_codec = numcodecs.get_codec(meta["filters"][0]) - else: - object_codec = None - - meta = dict( - zarr_format=cls.ZARR_FORMAT, - shape=meta["shape"] + sdshape, - chunks=meta["chunks"], - dtype=cls.encode_dtype(dtype), - compressor=meta["compressor"], - fill_value=cls.encode_fill_value(meta["fill_value"], dtype, object_codec), - order=meta["order"], - filters=meta["filters"], - ) - if dimension_separator: - meta["dimension_separator"] = dimension_separator - - return json_dumps(meta) - - @classmethod - def encode_dtype(cls, d: np.dtype): - if d.fields is None: - return d.str - else: - return d.descr - - @classmethod - def _decode_dtype_descr(cls, d) -> List[Any]: - # need to convert list of lists to list of tuples - if isinstance(d, list): - # recurse to handle nested structures - d = [(k[0], cls._decode_dtype_descr(k[1])) + tuple(k[2:]) for k in d] - return d - - @classmethod - def decode_dtype(cls, d) -> np.dtype: - d = cls._decode_dtype_descr(d) - return np.dtype(d) - - @classmethod - def decode_group_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - meta = cls.parse_metadata(s) - - # check metadata format version - zarr_format = meta.get("zarr_format", None) - if zarr_format != cls.ZARR_FORMAT: - raise MetadataError(f"unsupported zarr format: {zarr_format}") - - meta = dict(zarr_format=zarr_format) - return meta - - # N.B., keep `meta` parameter as a placeholder for future - # noinspection PyUnusedLocal - @classmethod - def encode_group_metadata(cls, meta=None) -> bytes: - meta = dict(zarr_format=cls.ZARR_FORMAT) - return json_dumps(meta) - - @classmethod - def decode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: - # early out - if v is None: - return v - if dtype.kind == "V" and dtype.hasobject: - if object_codec is None: - raise ValueError("missing object_codec for object array") - v = base64.standard_b64decode(v) - v = object_codec.decode(v) - v = np.array(v, dtype=dtype)[()] - return v - if dtype.kind == "f": - if v == "NaN": - return np.nan - elif v == "Infinity": - return np.inf - elif v == "-Infinity": - return -np.inf - else: - return np.array(v, dtype=dtype)[()] - elif dtype.kind in "c": - v = ( - cls.decode_fill_value(v[0], dtype.type().real.dtype), - cls.decode_fill_value(v[1], dtype.type().imag.dtype), - ) - v = v[0] + 1j * v[1] - return np.array(v, dtype=dtype)[()] - elif dtype.kind == "S": - # noinspection PyBroadException - try: - v = base64.standard_b64decode(v) - except Exception: - # be lenient, allow for other values that may have been used before base64 - # encoding and may work as fill values, e.g., the number 0 - pass - v = np.array(v, dtype=dtype)[()] - return v - elif dtype.kind == "V": - v = base64.standard_b64decode(v) - v = np.array(v, dtype=dtype.str).view(dtype)[()] - return v - elif dtype.kind == "U": - # leave as-is - return v - else: - return np.array(v, dtype=dtype)[()] - - @classmethod - def encode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: - # early out - if v is None: - return v - if dtype.kind == "V" and dtype.hasobject: - if object_codec is None: - raise ValueError("missing object_codec for object array") - v = object_codec.encode(v) - v = str(base64.standard_b64encode(v), "ascii") - return v - if dtype.kind == "f": - if np.isnan(v): - return "NaN" - elif np.isposinf(v): - return "Infinity" - elif np.isneginf(v): - return "-Infinity" - else: - return float(v) - elif dtype.kind in "ui": - return int(v) - elif dtype.kind == "b": - return bool(v) - elif dtype.kind in "c": - c = cast(np.complex128, np.dtype(complex).type()) - v = ( - cls.encode_fill_value(v.real, c.real.dtype, object_codec), - cls.encode_fill_value(v.imag, c.imag.dtype, object_codec), - ) - return v - elif dtype.kind in "SV": - v = str(base64.standard_b64encode(v), "ascii") - return v - elif dtype.kind == "U": - return v - elif dtype.kind in "mM": - return int(v.view("i8")) - else: - return v - - -class Metadata3(Metadata2): - ZARR_FORMAT = ZARR_FORMAT_v3 - - @classmethod - def decode_dtype(cls, d, validate=True): - if isinstance(d, dict): - # extract the type from the extension info - try: - d = d["type"] - except KeyError as e: - raise KeyError("Extended dtype info must provide a key named 'type'.") from e - d = cls._decode_dtype_descr(d) - dtype = np.dtype(d) - if validate: - if dtype.str in (_v3_core_types | {"|b1", "|u1", "|i1"}): - # it is a core dtype of the v3 spec - pass - else: - # will raise if this is not a recognized extended dtype - get_extended_dtype_info(dtype) - return dtype - - @classmethod - def encode_dtype(cls, d): - s = d.str - if s == "|b1": - return "bool" - elif s == "|u1": - return "u1" - elif s == "|i1": - return "i1" - elif s in _v3_core_types: - return Metadata2.encode_dtype(d) - else: - # Check if this dtype corresponds to a supported extension to - # the v3 protocol. - return get_extended_dtype_info(np.dtype(d)) - - @classmethod - def decode_group_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - meta = cls.parse_metadata(s) - # 1 / 0 - # # check metadata format version - # zarr_format = meta.get("zarr_format", None) - # if zarr_format != cls.ZARR_FORMAT: - # raise MetadataError(f"unsupported zarr format: {zarr_format}") - - assert "attributes" in meta - # meta = dict(attributes=meta['attributes']) - return meta - - # return json.loads(s) - - @classmethod - def encode_group_metadata(cls, meta=None) -> bytes: - # The ZARR_FORMAT should not be in the group metadata, but in the - # entry point metadata instead - # meta = dict(zarr_format=cls.ZARR_FORMAT) - if meta is None: - meta = {"attributes": {}} - meta = dict(attributes=meta.get("attributes", {})) - return json_dumps(meta) - - @classmethod - def encode_hierarchy_metadata(cls, meta=None) -> bytes: - if meta is None: - meta = _default_entry_point_metadata_v3 - elif set(meta.keys()) != { - "zarr_format", - "metadata_encoding", - "metadata_key_suffix", - "extensions", - }: - raise ValueError(f"Unexpected keys in metadata. meta={meta}") - return json_dumps(meta) - - @classmethod - def decode_hierarchy_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - meta = cls.parse_metadata(s) - # check metadata format - # zarr_format = meta.get("zarr_format", None) - # if zarr_format != "https://purl.org/zarr/spec/protocol/core/3.0": - # raise MetadataError(f"unsupported zarr format: {zarr_format}") - if set(meta.keys()) != { - "zarr_format", - "metadata_encoding", - "metadata_key_suffix", - "extensions", - }: - raise ValueError(f"Unexpected keys in metadata. meta={meta}") - return meta - - @classmethod - def _encode_codec_metadata(cls, codec: Codec) -> Optional[Mapping]: - if codec is None: - return None - - # only support gzip for now - config = codec.get_config() - del config["id"] - uri = "https://purl.org/zarr/spec/codec/" - if isinstance(codec, numcodecs.GZip): - uri = uri + "gzip/1.0" - elif isinstance(codec, numcodecs.Zlib): - uri = uri + "zlib/1.0" - elif isinstance(codec, numcodecs.Blosc): - uri = uri + "blosc/1.0" - elif isinstance(codec, numcodecs.BZ2): - uri = uri + "bz2/1.0" - elif isinstance(codec, numcodecs.LZ4): - uri = uri + "lz4/1.0" - elif isinstance(codec, numcodecs.LZMA): - uri = uri + "lzma/1.0" - elif isinstance(codec, numcodecs.Zstd): - uri = uri + "zstd/1.0" - meta = { - "codec": uri, - "configuration": config, - } - return meta - - @classmethod - def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: - if meta is None: - return None - - uri = "https://purl.org/zarr/spec/codec/" - conf = meta["configuration"] - if meta["codec"].startswith(uri + "gzip/"): - conf["id"] = "gzip" - elif meta["codec"].startswith(uri + "zlib/"): - conf["id"] = "zlib" - elif meta["codec"].startswith(uri + "blosc/"): - conf["id"] = "blosc" - elif meta["codec"].startswith(uri + "bz2/"): - conf["id"] = "bz2" - elif meta["codec"].startswith(uri + "lz4/"): - conf["id"] = "lz4" - elif meta["codec"].startswith(uri + "lzma/"): - conf["id"] = "lzma" - elif meta["codec"].startswith(uri + "zstd/"): - conf["id"] = "zstd" - else: - raise NotImplementedError - - codec = numcodecs.get_codec(conf) - - return codec - - @classmethod - def _encode_storage_transformer_metadata( - cls, storage_transformer: "StorageTransformer" - ) -> Optional[Mapping]: - return { - "extension": storage_transformer.extension_uri, - "type": storage_transformer.type, - "configuration": storage_transformer.get_config(), - } - - @classmethod - def _decode_storage_transformer_metadata(cls, meta: Mapping) -> "StorageTransformer": - from zarr.tests.test_storage_v3 import DummyStorageTransfomer - from zarr._storage.v3_storage_transformers import ShardingStorageTransformer - - # This might be changed to a proper registry in the future - KNOWN_STORAGE_TRANSFORMERS = [DummyStorageTransfomer, ShardingStorageTransformer] - - conf = meta.get("configuration", {}) - extension_uri = meta["extension"] - transformer_type = meta["type"] - - for StorageTransformerCls in KNOWN_STORAGE_TRANSFORMERS: - if StorageTransformerCls.extension_uri == extension_uri: - break - else: # pragma: no cover - raise NotImplementedError - - return StorageTransformerCls.from_config(transformer_type, conf) - - @classmethod - def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - meta = cls.parse_metadata(s) - - # extract array metadata fields - try: - dtype = cls.decode_dtype(meta["data_type"]) - if dtype.hasobject: - import numcodecs - - object_codec = numcodecs.get_codec(meta["attributes"]["filters"][0]) - else: - object_codec = None - fill_value = cls.decode_fill_value(meta["fill_value"], dtype, object_codec) - # TODO: remove dimension_separator? - - compressor = cls._decode_codec_metadata(meta.get("compressor", None)) - storage_transformers = meta.get("storage_transformers", ()) - storage_transformers = [ - cls._decode_storage_transformer_metadata(i) for i in storage_transformers - ] - extensions = meta.get("extensions", []) - meta = dict( - shape=tuple(meta["shape"]), - chunk_grid=dict( - type=meta["chunk_grid"]["type"], - chunk_shape=tuple(meta["chunk_grid"]["chunk_shape"]), - separator=meta["chunk_grid"]["separator"], - ), - data_type=dtype, - fill_value=fill_value, - chunk_memory_layout=meta["chunk_memory_layout"], - attributes=meta["attributes"], - extensions=extensions, - ) - # compressor field should be absent when there is no compression - if compressor: - meta["compressor"] = compressor - if storage_transformers: - meta["storage_transformers"] = storage_transformers - - except Exception as e: - raise MetadataError(f"error decoding metadata: {e}") from e - else: - return meta - - @classmethod - def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: - dtype = meta["data_type"] - sdshape = () - if dtype.subdtype is not None: - dtype, sdshape = dtype.subdtype - dimension_separator = meta.get("dimension_separator") - if dtype.hasobject: - import numcodecs - - object_codec = numcodecs.get_codec(meta["attributes"]["filters"][0]) - else: - object_codec = None - - compressor = cls._encode_codec_metadata(meta.get("compressor", None)) - storage_transformers = meta.get("storage_transformers", ()) - storage_transformers = [ - cls._encode_storage_transformer_metadata(i) for i in storage_transformers - ] - extensions = meta.get("extensions", []) - meta = dict( - shape=meta["shape"] + sdshape, - chunk_grid=dict( - type=meta["chunk_grid"]["type"], - chunk_shape=tuple(meta["chunk_grid"]["chunk_shape"]), - separator=meta["chunk_grid"]["separator"], - ), - data_type=cls.encode_dtype(dtype), - fill_value=encode_fill_value(meta["fill_value"], dtype, object_codec), - chunk_memory_layout=meta["chunk_memory_layout"], - attributes=meta.get("attributes", {}), - extensions=extensions, - ) - if compressor: - meta["compressor"] = compressor - if dimension_separator: - meta["dimension_separator"] = dimension_separator - if storage_transformers: - meta["storage_transformers"] = storage_transformers - return json_dumps(meta) - - -parse_metadata = Metadata2.parse_metadata -decode_array_metadata = Metadata2.decode_array_metadata -encode_array_metadata = Metadata2.encode_array_metadata -encode_dtype = Metadata2.encode_dtype -_decode_dtype_descr = Metadata2._decode_dtype_descr -decode_dtype = Metadata2.decode_dtype -decode_group_metadata = Metadata2.decode_group_metadata -encode_group_metadata = Metadata2.encode_group_metadata -decode_fill_value = Metadata2.decode_fill_value -encode_fill_value = Metadata2.encode_fill_value diff --git a/zarr/meta_v1.py b/zarr/meta_v1.py deleted file mode 100644 index 714f55f477..0000000000 --- a/zarr/meta_v1.py +++ /dev/null @@ -1,64 +0,0 @@ -import json - -import numpy as np - -from zarr.errors import MetadataError - - -def decode_metadata(b): - s = str(b, "ascii") - meta = json.loads(s) - zarr_format = meta.get("zarr_format", None) - if zarr_format != 1: - raise MetadataError(f"unsupported zarr format: {zarr_format}") - try: - meta = dict( - zarr_format=meta["zarr_format"], - shape=tuple(meta["shape"]), - chunks=tuple(meta["chunks"]), - dtype=decode_dtype(meta["dtype"]), - compression=meta["compression"], - compression_opts=meta["compression_opts"], - fill_value=meta["fill_value"], - order=meta["order"], - ) - except Exception as e: - raise MetadataError(f"error decoding metadata: {e}") from e - else: - return meta - - -def encode_metadata(meta): - meta = dict( - zarr_format=1, - shape=meta["shape"], - chunks=meta["chunks"], - dtype=encode_dtype(meta["dtype"]), - compression=meta["compression"], - compression_opts=meta["compression_opts"], - fill_value=meta["fill_value"], - order=meta["order"], - ) - s = json.dumps(meta, indent=4, sort_keys=True, ensure_ascii=True) - b = s.encode("ascii") - return b - - -def encode_dtype(d): - if d.fields is None: - return d.str - else: - return d.descr - - -def _decode_dtype_descr(d): - # need to convert list of lists to list of tuples - if isinstance(d, list): - # recurse to handle nested structures - d = [(f, _decode_dtype_descr(v)) for f, v in d] - return d - - -def decode_dtype(d): - d = _decode_dtype_descr(d) - return np.dtype(d) diff --git a/zarr/n5.py b/zarr/n5.py deleted file mode 100644 index 3bb7093128..0000000000 --- a/zarr/n5.py +++ /dev/null @@ -1,932 +0,0 @@ -"""This module contains a storage class and codec to support the N5 format. -""" - -import os -import struct -import sys -from typing import Any, Dict, Optional, cast -import warnings - -import numpy as np -from numcodecs.abc import Codec -from numcodecs.compat import ndarray_copy -from numcodecs.registry import get_codec, register_codec - -from .meta import ZARR_FORMAT, json_dumps, json_loads -from .storage import FSStore -from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path -from .storage import array_meta_key as zarr_array_meta_key -from .storage import attrs_key as zarr_attrs_key -from .storage import group_meta_key as zarr_group_meta_key - -N5_FORMAT = "2.0.0" - -zarr_to_n5_keys = [ - ("chunks", "blockSize"), - ("dtype", "dataType"), - ("compressor", "compression"), - ("shape", "dimensions"), -] -n5_attrs_key = "attributes.json" -n5_keywords = ["n5", "dataType", "dimensions", "blockSize", "compression"] - - -class N5Store(NestedDirectoryStore): - """Storage class using directories and files on a standard file system, - following the N5 format (https://github.com/saalfeldlab/n5). - - Parameters - ---------- - path : string - Location of directory to use as the root of the storage hierarchy. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-sensitive and - case-insensitive file system. Default value is False. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.N5Store('data/array.n5') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - - Store a group:: - - >>> store = zarr.N5Store('data/group.n5') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - - Notes - ----- - - This is an experimental feature. - - Safe to write in multiple threads or processes. - - .. deprecated:: 2.18.3 - `N5Store` will be removed in Zarr 3.0.0. - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - warnings.warn( - "The N5Store is deprecated and will be removed in a Zarr-Python version 3, " - "see https://github.com/zarr-developers/zarr-python/issues/1274 and " - "https://github.com/zarr-developers/n5py for more information.", - FutureWarning, - stacklevel=2, - ) - - def __getitem__(self, key: str) -> bytes: - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) - value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) - - return json_dumps(value) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) - top_level = key == zarr_array_meta_key - value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) - return json_dumps(value) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) - value = attrs_to_zarr(self._load_n5_attrs(key_new)) - - if len(value) == 0: - raise KeyError(key_new) - else: - return json_dumps(value) - - elif is_chunk_key(key): - key_new = invert_chunk_coords(key) - - else: - key_new = key - - return super().__getitem__(key_new) - - def __setitem__(self, key: str, value: Any): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) - - n5_attrs = self._load_n5_attrs(key_new) - n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - - value = json_dumps(n5_attrs) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) - top_level = key == zarr_array_meta_key - n5_attrs = self._load_n5_attrs(key_new) - n5_attrs.update(**array_metadata_to_n5(json_loads(value), top_level=top_level)) - value = json_dumps(n5_attrs) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) - - n5_attrs = self._load_n5_attrs(key_new) - zarr_attrs = json_loads(value) - - for k in n5_keywords: - if k in zarr_attrs: - warnings.warn( - f"Attribute {k} is a reserved N5 keyword", - UserWarning, - stacklevel=2, - ) - - # remove previous user attributes - for k in list(n5_attrs.keys()): - if k not in n5_keywords: - del n5_attrs[k] - - # add new user attributes - n5_attrs.update(**zarr_attrs) - - value = json_dumps(n5_attrs) - - elif is_chunk_key(key): - key_new = invert_chunk_coords(key) - - else: - key_new = key - - super().__setitem__(key_new, value) - - def __delitem__(self, key: str): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) - elif is_chunk_key(key): - key_new = invert_chunk_coords(key) - else: - key_new = key - - super().__delitem__(key_new) - - def __contains__(self, key): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) - if key_new not in self: - return False - # group if not a dataset (attributes do not contain 'dimensions') - return "dimensions" not in self._load_n5_attrs(key_new) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) - # array if attributes contain 'dimensions' - return "dimensions" in self._load_n5_attrs(key_new) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) - return self._contains_attrs(key_new) - - elif is_chunk_key(key): - key_new = invert_chunk_coords(key) - else: - key_new = key - - return super().__contains__(key_new) - - def __eq__(self, other): - return isinstance(other, N5Store) and self.path == other.path - - def listdir(self, path: Optional[str] = None): - if path is not None: - path = invert_chunk_coords(path) - path = cast(str, path) - # We can't use NestedDirectoryStore's listdir, as it requires - # array_meta_key to be present in array directories, which this store - # doesn't provide. - children = super().listdir(path=path) - - if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files - children.remove(n5_attrs_key) - children.append(zarr_array_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) - - # special handling of directories containing an array to map - # inverted nested chunk keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and os.path.isdir(entry_path): - for dir_path, _, file_names in os.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] - new_child = rel_path.replace(os.path.sep, ".") - new_children.append(invert_chunk_coords(new_child)) - else: - new_children.append(entry) - - return sorted(new_children) - - elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files - children.remove(n5_attrs_key) - children.append(zarr_group_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) - - return sorted(children) - - else: - return children - - def _load_n5_attrs(self, path: str) -> Dict[str, Any]: - try: - s = super().__getitem__(path) - return json_loads(s) - except KeyError: - return {} - - def _is_group(self, path: str): - if path is None: - attrs_key = n5_attrs_key - else: - attrs_key = os.path.join(path, n5_attrs_key) - - n5_attrs = self._load_n5_attrs(attrs_key) - return len(n5_attrs) > 0 and "dimensions" not in n5_attrs - - def _is_array(self, path: str): - if path is None: - attrs_key = n5_attrs_key - else: - attrs_key = os.path.join(path, n5_attrs_key) - - return "dimensions" in self._load_n5_attrs(attrs_key) - - def _contains_attrs(self, path: str): - if path is None: - attrs_key = n5_attrs_key - else: - if not path.endswith(n5_attrs_key): - attrs_key = os.path.join(path, n5_attrs_key) - else: - attrs_key = path - - attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) - return len(attrs) > 0 - - -class N5FSStore(FSStore): - """Implementation of the N5 format (https://github.com/saalfeldlab/n5) - using `fsspec`, which allows storage on a variety of filesystems. Based - on `zarr.N5Store`. - Parameters - ---------- - path : string - Location of directory to use as the root of the storage hierarchy. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-sensitive and - case-insensitive file system. Default value is False. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - - Store a group:: - - >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - - Notes - ----- - This is an experimental feature. - Safe to write in multiple threads or processes. - - Be advised that the `_dimension_separator` property of this store - (and arrays it creates) is ".", but chunks saved by this store will - in fact be "/" separated, as proscribed by the N5 format. - - This is counter-intuitive (to say the least), but not arbitrary. - Chunks in N5 format are stored with reversed dimension order - relative to Zarr chunks: a chunk of a 3D Zarr array would be stored - on a file system as `/0/1/2`, but in N5 the same chunk would be - stored as `/2/1/0`. Therefore, stores targeting N5 must intercept - chunk keys and flip the order of the dimensions before writing to - storage, and this procedure requires chunk keys with "." separated - dimensions, hence the Zarr arrays targeting N5 have the deceptive - "." dimension separator. - - .. deprecated:: 2.18.3 - `N5FSStore` will be removed in Zarr 3.0.0. - """ - - _array_meta_key = "attributes.json" - _group_meta_key = "attributes.json" - _attrs_key = "attributes.json" - - def __init__(self, *args, **kwargs): - warnings.warn( - "The N5FSStore is deprecated and will be removed in a Zarr-Python version 3, " - "see https://github.com/zarr-developers/zarr-python/issues/1274 and " - "https://github.com/zarr-developers/n5py for more information.", - FutureWarning, - stacklevel=2, - ) - if "dimension_separator" in kwargs: - kwargs.pop("dimension_separator") - warnings.warn( - "Keyword argument `dimension_separator` will be ignored", - stacklevel=2, - ) - dimension_separator = "." - super().__init__(*args, dimension_separator=dimension_separator, **kwargs) - - @staticmethod - def _swap_separator(key: str): - segments = list(key.split("/")) - if segments: - last_segment = segments[-1] - if _prog_ckey.match(last_segment): - coords = list(last_segment.split(".")) - last_segment = "/".join(coords[::-1]) - segments = segments[:-1] + [last_segment] - key = "/".join(segments) - return key - - def _normalize_key(self, key: str): - if is_chunk_key(key): - key = invert_chunk_coords(key) - - key = normalize_storage_path(key).lstrip("/") - if key: - *bits, end = key.split("/") - - if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): - end = end.replace(".", "/") - key = "/".join(bits + [end]) - return key.lower() if self.normalize_keys else key - - def __getitem__(self, key: str) -> bytes: - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) - value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) - - return json_dumps(value) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) - top_level = key == zarr_array_meta_key - value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) - return json_dumps(value) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) - value = attrs_to_zarr(self._load_n5_attrs(key_new)) - - if len(value) == 0: - raise KeyError(key_new) - else: - return json_dumps(value) - - elif is_chunk_key(key): - key_new = self._swap_separator(key) - - else: - key_new = key - - return super().__getitem__(key_new) - - def __setitem__(self, key: str, value: Any): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) - - n5_attrs = self._load_n5_attrs(key_new) - n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - - value = json_dumps(n5_attrs) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) - top_level = key == zarr_array_meta_key - n5_attrs = self._load_n5_attrs(key_new) - n5_attrs.update(**array_metadata_to_n5(json_loads(value), top_level=top_level)) - - value = json_dumps(n5_attrs) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) - - n5_attrs = self._load_n5_attrs(key_new) - zarr_attrs = json_loads(value) - - for k in n5_keywords: - if k in zarr_attrs.keys(): - warnings.warn( - f"Attribute {k} is a reserved N5 keyword", - UserWarning, - stacklevel=2, - ) - - # replace previous user attributes - for k in list(n5_attrs.keys()): - if k not in n5_keywords: - del n5_attrs[k] - - # add new user attributes - n5_attrs.update(**zarr_attrs) - - value = json_dumps(n5_attrs) - - elif is_chunk_key(key): - key_new = self._swap_separator(key) - - else: - key_new = key - - super().__setitem__(key_new, value) - - def __delitem__(self, key: str): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) - elif is_chunk_key(key): - key_new = self._swap_separator(key) - else: - key_new = key - super().__delitem__(key_new) - - def __contains__(self, key: Any): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) - if key_new not in self: - return False - # group if not a dataset (attributes do not contain 'dimensions') - return "dimensions" not in self._load_n5_attrs(key_new) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) - # array if attributes contain 'dimensions' - return "dimensions" in self._load_n5_attrs(key_new) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) - return self._contains_attrs(key_new) - - elif is_chunk_key(key): - key_new = self._swap_separator(key) - - else: - key_new = key - return super().__contains__(key_new) - - def __eq__(self, other: Any): - return isinstance(other, N5FSStore) and self.path == other.path - - def listdir(self, path: Optional[str] = None): - if path is not None: - path = invert_chunk_coords(path) - - # We can't use NestedDirectoryStore's listdir, as it requires - # array_meta_key to be present in array directories, which this store - # doesn't provide. - children = super().listdir(path=path) - if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files - children.remove(self._array_meta_key) - children.append(zarr_array_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) - - # special handling of directories containing an array to map - # inverted nested chunk keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and self.fs.isdir(entry_path): - for file_name in self.fs.find(entry_path): - file_path = os.path.join(root_path, file_name) - rel_path = file_path.split(root_path)[1] - new_child = rel_path.lstrip("/").replace("/", ".") - new_children.append(invert_chunk_coords(new_child)) - else: - new_children.append(entry) - return sorted(new_children) - - elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files - children.remove(self._group_meta_key) - children.append(zarr_group_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) - return sorted(children) - else: - return children - - def _load_n5_attrs(self, path: str): - try: - s = super().__getitem__(path) - return json_loads(s) - except KeyError: - return {} - - def _is_group(self, path: Optional[str]): - if path is None: - attrs_key = self._attrs_key - else: - attrs_key = os.path.join(path, self._attrs_key) - - n5_attrs = self._load_n5_attrs(attrs_key) - return len(n5_attrs) > 0 and "dimensions" not in n5_attrs - - def _is_array(self, path: Optional[str]): - if path is None: - attrs_key = self._attrs_key - else: - attrs_key = os.path.join(path, self._attrs_key) - - return "dimensions" in self._load_n5_attrs(attrs_key) - - def _contains_attrs(self, path: Optional[str]): - if path is None: - attrs_key = self._attrs_key - else: - if not path.endswith(self._attrs_key): - attrs_key = os.path.join(path, self._attrs_key) - else: - attrs_key = path - - attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) - return len(attrs) > 0 - - -def is_chunk_key(key: str): - rv = False - segments = list(key.split("/")) - if segments: - last_segment = segments[-1] - rv = bool(_prog_ckey.match(last_segment)) - return rv - - -def invert_chunk_coords(key: str): - segments = list(key.split("/")) - if segments: - last_segment = segments[-1] - if _prog_ckey.match(last_segment): - coords = list(last_segment.split(".")) - last_segment = "/".join(coords[::-1]) - segments = segments[:-1] + [last_segment] - key = "/".join(segments) - return key - - -def group_metadata_to_n5(group_metadata: Dict[str, Any]) -> Dict[str, Any]: - """Convert group metadata from zarr to N5 format.""" - del group_metadata["zarr_format"] - # TODO: This should only exist at the top-level - group_metadata["n5"] = N5_FORMAT - return group_metadata - - -def group_metadata_to_zarr(group_metadata: Dict[str, Any]) -> Dict[str, Any]: - """Convert group metadata from N5 to zarr format.""" - # This only exists at the top level - group_metadata.pop("n5", None) - group_metadata["zarr_format"] = ZARR_FORMAT - return group_metadata - - -def array_metadata_to_n5(array_metadata: Dict[str, Any], top_level=False) -> Dict[str, Any]: - """Convert array metadata from zarr to N5 format. If the `top_level` keyword argument is True, - then the `N5` : N5_FORMAT key : value pair will be inserted into the metadata.""" - - for f, t in zarr_to_n5_keys: - array_metadata[t] = array_metadata.pop(f) - del array_metadata["zarr_format"] - if top_level: - array_metadata["n5"] = N5_FORMAT - try: - dtype = np.dtype(array_metadata["dataType"]) - except TypeError as e: - raise TypeError(f"Data type {array_metadata['dataType']} is not supported by N5") from e - - array_metadata["dataType"] = dtype.name - array_metadata["dimensions"] = array_metadata["dimensions"][::-1] - array_metadata["blockSize"] = array_metadata["blockSize"][::-1] - - if "fill_value" in array_metadata: - if array_metadata["fill_value"] != 0 and array_metadata["fill_value"] is not None: - raise ValueError( - f"""Received fill_value = {array_metadata['fill_value']}, - but N5 only supports fill_value = 0""" - ) - del array_metadata["fill_value"] - - if "order" in array_metadata: - if array_metadata["order"] != "C": - raise ValueError( - f"Received order = {array_metadata['order']}, but N5 only supports order = C" - ) - del array_metadata["order"] - - if "filters" in array_metadata: - if array_metadata["filters"] != [] and array_metadata["filters"] is not None: - raise ValueError("Received filters, but N5 storage does not support zarr filters") - del array_metadata["filters"] - - assert "compression" in array_metadata - compressor_config = array_metadata["compression"] - compressor_config = compressor_config_to_n5(compressor_config) - array_metadata["compression"] = compressor_config - - if "dimension_separator" in array_metadata: - del array_metadata["dimension_separator"] - - return array_metadata - - -def array_metadata_to_zarr( - array_metadata: Dict[str, Any], top_level: bool = False -) -> Dict[str, Any]: - """Convert array metadata from N5 to zarr format. - If the `top_level` keyword argument is True, then the `N5` key will be removed from metadata""" - for t, f in zarr_to_n5_keys: - array_metadata[t] = array_metadata.pop(f) - if top_level: - array_metadata.pop("n5") - array_metadata["zarr_format"] = ZARR_FORMAT - - array_metadata["shape"] = array_metadata["shape"][::-1] - array_metadata["chunks"] = array_metadata["chunks"][::-1] - array_metadata["fill_value"] = 0 # also if None was requested - array_metadata["order"] = "C" - array_metadata["filters"] = [] - array_metadata["dimension_separator"] = "." - array_metadata["dtype"] = np.dtype(array_metadata["dtype"]).str - - compressor_config = array_metadata["compressor"] - compressor_config = compressor_config_to_zarr(compressor_config) - array_metadata["compressor"] = { - "id": N5ChunkWrapper.codec_id, - "compressor_config": compressor_config, - "dtype": array_metadata["dtype"], - "chunk_shape": array_metadata["chunks"], - } - - return array_metadata - - -def attrs_to_zarr(attrs: Dict[str, Any]) -> Dict[str, Any]: - """Get all zarr attributes from an N5 attributes dictionary (i.e., - all non-keyword attributes).""" - - # remove all N5 keywords - for n5_key in n5_keywords: - if n5_key in attrs: - del attrs[n5_key] - - return attrs - - -def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict[str, Any]: - if compressor_config is None: - return {"type": "raw"} - else: - _compressor_config = compressor_config - - # peel wrapper, if present - if _compressor_config["id"] == N5ChunkWrapper.codec_id: - _compressor_config = _compressor_config["compressor_config"] - - codec_id = _compressor_config["id"] - n5_config = {"type": codec_id} - - if codec_id == "bz2": - n5_config["type"] = "bzip2" - n5_config["blockSize"] = _compressor_config["level"] - - elif codec_id == "blosc": - n5_config["cname"] = _compressor_config["cname"] - n5_config["clevel"] = _compressor_config["clevel"] - n5_config["shuffle"] = _compressor_config["shuffle"] - n5_config["blocksize"] = _compressor_config["blocksize"] - - elif codec_id == "lzma": - # Switch to XZ for N5 if we are using the default XZ format. - # Note: 4 is the default, which is lzma.CHECK_CRC64. - if _compressor_config["format"] == 1 and _compressor_config["check"] in [-1, 4]: - n5_config["type"] = "xz" - else: - warnings.warn( - "Not all N5 implementations support lzma compression (yet). You " - "might not be able to open the dataset with another N5 library.", - RuntimeWarning, - stacklevel=2, - ) - n5_config["format"] = _compressor_config["format"] - n5_config["check"] = _compressor_config["check"] - n5_config["filters"] = _compressor_config["filters"] - - # The default is lzma.PRESET_DEFAULT, which is 6. - if _compressor_config["preset"]: - n5_config["preset"] = _compressor_config["preset"] - else: - n5_config["preset"] = 6 - - elif codec_id == "zlib": - n5_config["type"] = "gzip" - n5_config["level"] = _compressor_config["level"] - n5_config["useZlib"] = True - - elif codec_id == "gzip": - n5_config["type"] = "gzip" - n5_config["level"] = _compressor_config["level"] - n5_config["useZlib"] = False - - else: - n5_config.update({k: v for k, v in _compressor_config.items() if k != "type"}) - - return n5_config - - -def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dict[str, Any]]: - codec_id = compressor_config["type"] - zarr_config = {"id": codec_id} - - if codec_id == "bzip2": - zarr_config["id"] = "bz2" - zarr_config["level"] = compressor_config["blockSize"] - - elif codec_id == "blosc": - zarr_config["cname"] = compressor_config["cname"] - zarr_config["clevel"] = compressor_config["clevel"] - zarr_config["shuffle"] = compressor_config["shuffle"] - zarr_config["blocksize"] = compressor_config["blocksize"] - - elif codec_id == "lzma": - zarr_config["format"] = compressor_config["format"] - zarr_config["check"] = compressor_config["check"] - zarr_config["preset"] = compressor_config["preset"] - zarr_config["filters"] = compressor_config["filters"] - - elif codec_id == "xz": - zarr_config["id"] = "lzma" - zarr_config["format"] = 1 # lzma.FORMAT_XZ - zarr_config["check"] = -1 - zarr_config["preset"] = compressor_config["preset"] - zarr_config["filters"] = None - - elif codec_id == "gzip": - if "useZlib" in compressor_config and compressor_config["useZlib"]: - zarr_config["id"] = "zlib" - zarr_config["level"] = compressor_config["level"] - else: - zarr_config["id"] = "gzip" - zarr_config["level"] = compressor_config["level"] - - elif codec_id == "raw": - return None - - else: - zarr_config.update({k: v for k, v in compressor_config.items() if k != "type"}) - - return zarr_config - - -class N5ChunkWrapper(Codec): - codec_id = "n5_wrapper" - - def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): - self.dtype = np.dtype(dtype) - self.chunk_shape = tuple(chunk_shape) - # is the dtype a little endian format? - self._little_endian = self.dtype.byteorder == "<" or ( - self.dtype.byteorder == "=" and sys.byteorder == "little" - ) - - if compressor: - if compressor_config is not None: - raise ValueError("Only one of compressor_config or compressor should be given.") - compressor_config = compressor.get_config() - - if compressor_config is None and compressor is None or compressor_config["id"] == "raw": - self.compressor_config = None - self._compressor = None - else: - self._compressor = get_codec(compressor_config) - self.compressor_config = self._compressor.get_config() - - def get_config(self): - config = {"id": self.codec_id, "compressor_config": self.compressor_config} - return config - - def encode(self, chunk): - assert chunk.flags.c_contiguous - - header = self._create_header(chunk) - chunk = self._to_big_endian(chunk) - - if self._compressor: - return header + self._compressor.encode(chunk) - else: - return header + chunk.tobytes(order="A") - - def decode(self, chunk, out=None) -> bytes: - len_header, chunk_shape = self._read_header(chunk) - chunk = chunk[len_header:] - - if out is not None: - # out should only be used if we read a complete chunk - assert ( - chunk_shape == self.chunk_shape - ), f"Expected chunk of shape {self.chunk_shape}, found {chunk_shape}" - - if self._compressor: - self._compressor.decode(chunk, out) - else: - ndarray_copy(chunk, out) - - # we can byteswap in-place - if self._little_endian: - out.byteswap(True) - - return out - - else: - if self._compressor: - chunk = self._compressor.decode(chunk) - - # more expensive byteswap - chunk = self._from_big_endian(chunk) - - # read partial chunk - if chunk_shape != self.chunk_shape: - chunk = np.frombuffer(chunk, dtype=self.dtype) - chunk = chunk.reshape(chunk_shape) - complete_chunk = np.zeros(self.chunk_shape, dtype=self.dtype) - target_slices = tuple(slice(0, s) for s in chunk_shape) - complete_chunk[target_slices] = chunk - chunk = complete_chunk - - return chunk - - @staticmethod - def _create_header(chunk): - mode = struct.pack(">H", 0) - num_dims = struct.pack(">H", len(chunk.shape)) - shape = b"".join(struct.pack(">I", d) for d in chunk.shape[::-1]) - - return mode + num_dims + shape - - @staticmethod - def _read_header(chunk): - num_dims = struct.unpack(">H", chunk[2:4])[0] - shape = tuple( - struct.unpack(">I", chunk[i : i + 4])[0] for i in range(4, num_dims * 4 + 4, 4) - )[::-1] - - len_header = 4 + num_dims * 4 - - return len_header, shape - - def _to_big_endian(self, data): - # assumes data is ndarray - - if self._little_endian: - return data.byteswap() - return data - - def _from_big_endian(self, data): - # assumes data is byte array in big endian - - if not self._little_endian: - return data - - a = np.frombuffer(data, self.dtype.newbyteorder(">")) - return a.astype(self.dtype) - - -register_codec(N5ChunkWrapper, N5ChunkWrapper.codec_id) diff --git a/zarr/storage.py b/zarr/storage.py deleted file mode 100644 index f412870f75..0000000000 --- a/zarr/storage.py +++ /dev/null @@ -1,3080 +0,0 @@ -"""This module contains storage classes for use with Zarr arrays and groups. - -Note that any object implementing the :class:`MutableMapping` interface from the -:mod:`collections` module in the Python standard library can be used as a Zarr -array store, as long as it accepts string (str) keys and bytes values. - -In addition to the :class:`MutableMapping` interface, store classes may also implement -optional methods `listdir` (list members of a "directory") and `rmdir` (remove all -members of a "directory"). These methods should be implemented if the store class is -aware of the hierarchical organisation of resources within the store and can provide -efficient implementations. If these methods are not available, Zarr will fall back to -slower implementations that work via the :class:`MutableMapping` interface. Store -classes may also optionally implement a `rename` method (rename all members under a given -path) and a `getsize` method (return the size in bytes of a given value). - -""" - -import atexit -import errno -import glob -import multiprocessing -import operator -import os -import re -import shutil -import sys -import tempfile -import warnings -import zipfile -from collections import OrderedDict -from collections.abc import MutableMapping -from functools import lru_cache -from os import scandir -from pickle import PicklingError -from threading import Lock, RLock -from typing import Sequence, Mapping, Optional, Union, List, Tuple, Dict, Any -import uuid -import time - -from numcodecs.abc import Codec -from numcodecs.compat import ensure_bytes, ensure_text, ensure_contiguous_ndarray_like -from numcodecs.registry import codec_registry -from zarr.context import Context -from zarr.types import PathLike as Path, DIMENSION_SEPARATOR -from zarr.util import NoLock - -from zarr.errors import ( - MetadataError, - BadCompressorError, - ContainsArrayError, - ContainsGroupError, - FSPathExistNotDir, - ReadOnlyError, -) -from zarr.meta import encode_array_metadata, encode_group_metadata -from zarr.util import ( - buffer_size, - json_loads, - nolock, - normalize_chunks, - normalize_dimension_separator, - normalize_dtype, - normalize_fill_value, - normalize_order, - normalize_shape, - normalize_storage_path, - retry_call, - ensure_contiguous_ndarray_or_bytes, -) - -from zarr._storage.absstore import ABSStore # noqa: F401 -from zarr._storage.store import ( # noqa: F401 - _get_hierarchy_metadata, - _get_metadata_suffix, - _listdir_from_keys, - _rename_from_keys, - _rename_metadata_v3, - _rmdir_from_keys, - _rmdir_from_keys_v3, - _path_to_prefix, - _prefix_to_array_key, - _prefix_to_group_key, - array_meta_key, - attrs_key, - data_root, - group_meta_key, - meta_root, - DEFAULT_ZARR_VERSION, - BaseStore, - Store, - V3_DEPRECATION_MESSAGE, -) - -__doctest_requires__ = { - ("RedisStore", "RedisStore.*"): ["redis"], - ("MongoDBStore", "MongoDBStore.*"): ["pymongo"], - ("LRUStoreCache", "LRUStoreCache.*"): ["s3fs"], -} - - -try: - # noinspection PyUnresolvedReferences - from zarr.codecs import Blosc - - default_compressor = Blosc() -except ImportError: # pragma: no cover - from zarr.codecs import Zlib - - default_compressor = Zlib() - - -# allow MutableMapping for backwards compatibility -StoreLike = Union[BaseStore, MutableMapping] - - -def contains_array(store: StoreLike, path: Path = None) -> bool: - """Return True if the store contains an array at the given logical path.""" - path = normalize_storage_path(path) - prefix = _path_to_prefix(path) - key = _prefix_to_array_key(store, prefix) - return key in store - - -def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> bool: - """Return True if the store contains a group at the given logical path.""" - path = normalize_storage_path(path) - prefix = _path_to_prefix(path) - key = _prefix_to_group_key(store, prefix) - store_version = getattr(store, "_store_version", 2) - if store_version == 2 or explicit_only: - return key in store - else: - if key in store: - return True - # for v3, need to also handle implicit groups - - sfx = _get_metadata_suffix(store) # type: ignore - implicit_prefix = key.replace(".group" + sfx, "") - if not implicit_prefix.endswith("/"): - implicit_prefix += "/" - if store.list_prefix(implicit_prefix): # type: ignore - return True - return False - - -def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseStore: - # default to v2 store for backward compatibility - zarr_version = getattr(store, "_store_version", 2) - if zarr_version != 2: - raise ValueError("store must be a version 2 store") - if store is None: - store = KVStore(dict()) - return store - if isinstance(store, os.PathLike): - store = os.fspath(store) - if FSStore._fsspec_installed(): - import fsspec - - if isinstance(store, fsspec.FSMap): - return FSStore( - store.root, - fs=store.fs, - mode=mode, - check=store.check, - create=store.create, - missing_exceptions=store.missing_exceptions, - **(storage_options or {}), - ) - if isinstance(store, str): - if "://" in store or "::" in store: - return FSStore(store, mode=mode, **(storage_options or {})) - elif storage_options: - raise ValueError("storage_options passed with non-fsspec path") - if store.endswith(".zip"): - return ZipStore(store, mode=mode) - elif store.endswith(".n5"): - from zarr.n5 import N5Store - - return N5Store(store) - else: - return DirectoryStore(store) - else: - store = Store._ensure_store(store) - return store - - -def normalize_store_arg( - store: Any, storage_options=None, mode="r", *, zarr_version=None -) -> BaseStore: - if zarr_version is None: - # default to v2 store for backward compatibility - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - if zarr_version == 2: - normalize_store = _normalize_store_arg_v2 - elif zarr_version == 3: - from zarr._storage.v3 import _normalize_store_arg_v3 - - normalize_store = _normalize_store_arg_v3 - else: - raise ValueError("zarr_version must be either 2 or 3") - return normalize_store(store, storage_options, mode) - - -def rmdir(store: StoreLike, path: Path = None): - """Remove all items under the given path. If `store` provides a `rmdir` method, - this will be called, otherwise will fall back to implementation via the - `Store` interface.""" - path = normalize_storage_path(path) - store_version = getattr(store, "_store_version", 2) - if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore - # pass through - store.rmdir(path) - else: - # slow version, delete one key at a time - if store_version == 2: - _rmdir_from_keys(store, path) - else: - _rmdir_from_keys_v3(store, path) # type: ignore - - -def rename(store: Store, src_path: Path, dst_path: Path): - """Rename all items under the given path. If `store` provides a `rename` method, - this will be called, otherwise will fall back to implementation via the - `Store` interface.""" - src_path = normalize_storage_path(src_path) - dst_path = normalize_storage_path(dst_path) - if hasattr(store, "rename"): - # pass through - store.rename(src_path, dst_path) - else: - # slow version, delete one key at a time - _rename_from_keys(store, src_path, dst_path) - - -def listdir(store: BaseStore, path: Path = None): - """Obtain a directory listing for the given path. If `store` provides a `listdir` - method, this will be called, otherwise will fall back to implementation via the - `MutableMapping` interface.""" - path = normalize_storage_path(path) - if hasattr(store, "listdir"): - # pass through - return store.listdir(path) - else: - # slow version, iterate through all keys - warnings.warn( - f"Store {store} has no `listdir` method. From zarr 2.9 onwards " - "may want to inherit from `Store`.", - stacklevel=2, - ) - return _listdir_from_keys(store, path) - - -def _getsize(store: BaseStore, path: Path = None) -> int: - # compute from size of values - if path and path in store: - v = store[path] - size = buffer_size(v) - else: - path = "" if path is None else normalize_storage_path(path) - size = 0 - store_version = getattr(store, "_store_version", 2) - if store_version == 3: - if path == "": - # have to list the root folders without trailing / in this case - members = store.list_prefix(data_root.rstrip("/")) # type: ignore - members += store.list_prefix(meta_root.rstrip("/")) # type: ignore - else: - members = store.list_prefix(data_root + path) # type: ignore - members += store.list_prefix(meta_root + path) # type: ignore - # also include zarr.json? - # members += ['zarr.json'] - else: - members = listdir(store, path) - prefix = _path_to_prefix(path) - members = [prefix + k for k in members] - for k in members: - try: - v = store[k] - except KeyError: - pass - else: - try: - size += buffer_size(v) - except TypeError: - return -1 - return size - - -def getsize(store: BaseStore, path: Path = None) -> int: - """Compute size of stored items for a given path. If `store` provides a `getsize` - method, this will be called, otherwise will return -1.""" - if hasattr(store, "getsize"): - # pass through - path = normalize_storage_path(path) - return store.getsize(path) - elif isinstance(store, MutableMapping): - return _getsize(store, path) - else: - return -1 - - -def _require_parent_group( - path: Optional[str], - store: StoreLike, - chunk_store: Optional[StoreLike], - overwrite: bool, -): - # assume path is normalized - if path: - segments = path.split("/") - for i in range(len(segments)): - p = "/".join(segments[:i]) - if contains_array(store, p): - _init_group_metadata(store, path=p, chunk_store=chunk_store, overwrite=overwrite) - elif not contains_group(store, p): - _init_group_metadata(store, path=p, chunk_store=chunk_store) - - -def init_array( - store: StoreLike, - shape: Union[int, Tuple[int, ...]], - chunks: Union[bool, int, Tuple[int, ...]] = True, - dtype=None, - compressor="default", - fill_value=None, - order: str = "C", - overwrite: bool = False, - path: Optional[Path] = None, - chunk_store: Optional[StoreLike] = None, - filters=None, - object_codec=None, - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - storage_transformers=(), -): - """Initialize an array store with the given configuration. Note that this is a low-level - function and there should be no need to call this directly from user code. - - Parameters - ---------- - store : Store - A mapping that supports string keys and bytes-like values. - shape : int or tuple of ints - Array shape. - chunks : bool, int or tuple of ints, optional - Chunk shape. If True, will be guessed from `shape` and `dtype`. If - False, will be set to `shape`, i.e., single chunk for the whole array. - dtype : string or dtype, optional - NumPy dtype. - compressor : Codec, optional - Primary compressor. - fill_value : object - Default value to use for uninitialized portions of the array. - order : {'C', 'F'}, optional - Memory layout to be used within each chunk. - overwrite : bool, optional - If True, erase all data in `store` prior to initialisation. - path : string, bytes, optional - Path under which array is stored. - chunk_store : Store, optional - Separate storage for chunks. If not provided, `store` will be used - for storage of both chunks and metadata. - filters : sequence, optional - Sequence of filters to use to encode chunk data prior to compression. - object_codec : Codec, optional - A codec to encode object arrays, only needed if dtype=object. - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - - Examples - -------- - Initialize an array store:: - - >>> from zarr.storage import init_array, KVStore - >>> store = KVStore(dict()) - >>> init_array(store, shape=(10000, 10000), chunks=(1000, 1000)) - >>> sorted(store.keys()) - ['.zarray'] - - Array metadata is stored as JSON:: - - >>> print(store['.zarray'].decode()) - { - "chunks": [ - 1000, - 1000 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": ">> store = KVStore(dict()) - >>> init_array(store, shape=100000000, chunks=1000000, dtype='i1', path='foo') - >>> sorted(store.keys()) - ['.zgroup', 'foo/.zarray'] - >>> print(store['foo/.zarray'].decode()) - { - "chunks": [ - 1000000 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "|i1", - "fill_value": null, - "filters": null, - "order": "C", - "shape": [ - 100000000 - ], - "zarr_format": 2 - } - - Notes - ----- - The initialisation process involves normalising all array metadata, encoding - as JSON and storing under the '.zarray' key. - - """ - - # normalize path - path = normalize_storage_path(path) - - # ensure parent group initialized - store_version = getattr(store, "_store_version", 2) - if store_version < 3: - _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) - - if store_version == 3 and "zarr.json" not in store: - # initialize with default zarr.json entry level metadata - store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore - - if not compressor: - # compatibility with legacy tests using compressor=[] - compressor = None - _init_array_metadata( - store, - shape=shape, - chunks=chunks, - dtype=dtype, - compressor=compressor, - fill_value=fill_value, - order=order, - overwrite=overwrite, - path=path, - chunk_store=chunk_store, - filters=filters, - object_codec=object_codec, - dimension_separator=dimension_separator, - storage_transformers=storage_transformers, - ) - - -def _init_array_metadata( - store: StoreLike, - shape, - chunks=None, - dtype=None, - compressor="default", - fill_value=None, - order="C", - overwrite=False, - path: Optional[str] = None, - chunk_store: Optional[StoreLike] = None, - filters=None, - object_codec=None, - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - storage_transformers=(), -): - store_version = getattr(store, "_store_version", 2) - - path = normalize_storage_path(path) - - # guard conditions - if overwrite: - if store_version == 2: - # attempt to delete any pre-existing array in store - rmdir(store, path) - if chunk_store is not None: - rmdir(chunk_store, path) - else: - group_meta_key = _prefix_to_group_key(store, _path_to_prefix(path)) - array_meta_key = _prefix_to_array_key(store, _path_to_prefix(path)) - data_prefix = data_root + _path_to_prefix(path) - - # attempt to delete any pre-existing array in store - if array_meta_key in store: - store.erase(array_meta_key) # type: ignore - if group_meta_key in store: - store.erase(group_meta_key) # type: ignore - store.erase_prefix(data_prefix) # type: ignore - if chunk_store is not None: - chunk_store.erase_prefix(data_prefix) # type: ignore - - if "/" in path: - # path is a subfolder of an existing array, remove that array - parent_path = "/".join(path.split("/")[:-1]) - sfx = _get_metadata_suffix(store) # type: ignore - array_key = meta_root + parent_path + ".array" + sfx - if array_key in store: - store.erase(array_key) # type: ignore - - if not overwrite: - if contains_array(store, path): - raise ContainsArrayError(path) - elif contains_group(store, path, explicit_only=False): - raise ContainsGroupError(path) - elif store_version == 3: - if "/" in path: - # cannot create an array within an existing array path - parent_path = "/".join(path.split("/")[:-1]) - if contains_array(store, parent_path): - raise ContainsArrayError(path) - - # normalize metadata - dtype, object_codec = normalize_dtype(dtype, object_codec) - shape = normalize_shape(shape) + dtype.shape - dtype = dtype.base - chunks = normalize_chunks(chunks, shape, dtype.itemsize) - order = normalize_order(order) - fill_value = normalize_fill_value(fill_value, dtype) - - # optional array metadata - if dimension_separator is None and store_version == 2: - dimension_separator = getattr(store, "_dimension_separator", None) - dimension_separator = normalize_dimension_separator(dimension_separator) - - # compressor prep - if shape == (): - # no point in compressing a 0-dimensional array, only a single value - compressor = None - elif compressor == "none": - # compatibility - compressor = None - elif compressor == "default": - compressor = default_compressor - - # obtain compressor config - compressor_config = None - if compressor: - if store_version == 2: - try: - compressor_config = compressor.get_config() - except AttributeError as e: - raise BadCompressorError(compressor) from e - elif not isinstance(compressor, Codec): - raise ValueError("expected a numcodecs Codec for compressor") - # TODO: alternatively, could autoconvert str to a Codec - # e.g. 'zlib' -> numcodec.Zlib object - # compressor = numcodecs.get_codec({'id': compressor}) - - # obtain filters config - if filters: - # TODO: filters was removed from the metadata in v3 - # raise error here if store_version > 2? - filters_config = [f.get_config() for f in filters] - else: - filters_config = [] - - # deal with object encoding - if dtype.hasobject: - if object_codec is None: - if not filters: - # there are no filters so we can be sure there is no object codec - raise ValueError("missing object_codec for object array") - else: - # one of the filters may be an object codec, issue a warning rather - # than raise an error to maintain backwards-compatibility - warnings.warn( - "missing object_codec for object array; this will raise a " - "ValueError in version 3.0", - FutureWarning, - stacklevel=2, - ) - else: - filters_config.insert(0, object_codec.get_config()) - elif object_codec is not None: - warnings.warn( - "an object_codec is only needed for object arrays", - stacklevel=2, - ) - - # use null to indicate no filters - if not filters_config: - filters_config = None # type: ignore - - # initialize metadata - # TODO: don't store redundant dimension_separator for v3? - _compressor = compressor_config if store_version == 2 else compressor - meta = dict( - shape=shape, - compressor=_compressor, - fill_value=fill_value, - dimension_separator=dimension_separator, - ) - if store_version < 3: - meta.update(dict(chunks=chunks, dtype=dtype, order=order, filters=filters_config)) - assert not storage_transformers - else: - if dimension_separator is None: - dimension_separator = "/" - if filters_config: - attributes = {"filters": filters_config} - else: - attributes = {} - meta.update( - dict( - chunk_grid=dict(type="regular", chunk_shape=chunks, separator=dimension_separator), - chunk_memory_layout=order, - data_type=dtype, - attributes=attributes, - storage_transformers=storage_transformers, - ) - ) - - key = _prefix_to_array_key(store, _path_to_prefix(path)) - if hasattr(store, "_metadata_class"): - store[key] = store._metadata_class.encode_array_metadata(meta) - else: - store[key] = encode_array_metadata(meta) - - -# backwards compatibility -init_store = init_array - - -def init_group( - store: StoreLike, - overwrite: bool = False, - path: Path = None, - chunk_store: Optional[StoreLike] = None, -): - """Initialize a group store. Note that this is a low-level function and there should be no - need to call this directly from user code. - - Parameters - ---------- - store : Store - A mapping that supports string keys and byte sequence values. - overwrite : bool, optional - If True, erase all data in `store` prior to initialisation. - path : string, optional - Path under which array is stored. - chunk_store : Store, optional - Separate storage for chunks. If not provided, `store` will be used - for storage of both chunks and metadata. - - """ - - # normalize path - path = normalize_storage_path(path) - - store_version = getattr(store, "_store_version", 2) - if store_version < 3: - # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) - - if store_version == 3 and "zarr.json" not in store: - # initialize with default zarr.json entry level metadata - store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore - - # initialise metadata - _init_group_metadata(store=store, overwrite=overwrite, path=path, chunk_store=chunk_store) - - if store_version == 3: - # TODO: Should initializing a v3 group also create a corresponding - # empty folder under data/root/? I think probably not until there - # is actual data written there. - pass - - -def _init_group_metadata( - store: StoreLike, - overwrite: Optional[bool] = False, - path: Optional[str] = None, - chunk_store: Optional[StoreLike] = None, -): - store_version = getattr(store, "_store_version", 2) - path = normalize_storage_path(path) - - # guard conditions - if overwrite: - if store_version == 2: - # attempt to delete any pre-existing items in store - rmdir(store, path) - if chunk_store is not None: - rmdir(chunk_store, path) - else: - group_meta_key = _prefix_to_group_key(store, _path_to_prefix(path)) - array_meta_key = _prefix_to_array_key(store, _path_to_prefix(path)) - data_prefix = data_root + _path_to_prefix(path) - meta_prefix = meta_root + _path_to_prefix(path) - - # attempt to delete any pre-existing array in store - if array_meta_key in store: - store.erase(array_meta_key) # type: ignore - if group_meta_key in store: - store.erase(group_meta_key) # type: ignore - store.erase_prefix(data_prefix) # type: ignore - store.erase_prefix(meta_prefix) # type: ignore - if chunk_store is not None: - chunk_store.erase_prefix(data_prefix) # type: ignore - - if not overwrite: - if contains_array(store, path): - raise ContainsArrayError(path) - elif contains_group(store, path): - raise ContainsGroupError(path) - elif store_version == 3 and "/" in path: - # cannot create a group overlapping with an existing array name - parent_path = "/".join(path.split("/")[:-1]) - if contains_array(store, parent_path): - raise ContainsArrayError(path) - - # initialize metadata - # N.B., currently no metadata properties are needed, however there may - # be in future - if store_version == 3: - meta = {"attributes": {}} # type: ignore - else: - meta = {} - key = _prefix_to_group_key(store, _path_to_prefix(path)) - if hasattr(store, "_metadata_class"): - store[key] = store._metadata_class.encode_group_metadata(meta) - else: - store[key] = encode_group_metadata(meta) - - -def _dict_store_keys(d: Dict, prefix="", cls=dict): - for k in d.keys(): - v = d[k] - if isinstance(v, cls): - yield from _dict_store_keys(v, prefix + k + "/", cls) - else: - yield prefix + k - - -class KVStore(Store): - """ - This provides a default implementation of a store interface around - a mutable mapping, to avoid having to test stores for presence of methods. - - This, for most methods should just be a pass-through to the underlying KV - store which is likely to expose a MuttableMapping interface, - """ - - def __init__(self, mutablemapping): - self._mutable_mapping = mutablemapping - - def __getitem__(self, key): - return self._mutable_mapping[key] - - def __setitem__(self, key, value): - self._mutable_mapping[key] = value - - def __delitem__(self, key): - del self._mutable_mapping[key] - - def __contains__(self, key): - return key in self._mutable_mapping - - def get(self, key, default=None): - return self._mutable_mapping.get(key, default) - - def values(self): - return self._mutable_mapping.values() - - def __iter__(self): - return iter(self._mutable_mapping) - - def __len__(self): - return len(self._mutable_mapping) - - def __repr__(self): - return f"<{self.__class__.__name__}: \n{self._mutable_mapping!r}\n at {id(self):#x}>" - - def __eq__(self, other): - if isinstance(other, KVStore): - return self._mutable_mapping == other._mutable_mapping - else: - return NotImplemented - - -class MemoryStore(Store): - """Store class that uses a hierarchy of :class:`KVStore` objects, thus all data - will be held in main memory. - - Examples - -------- - This is the default class used when creating a group. E.g.:: - - >>> import zarr - >>> g = zarr.group() - >>> type(g.store) - - - Note that the default class when creating an array is the built-in - :class:`KVStore` class, i.e.:: - - >>> z = zarr.zeros(100) - >>> type(z.store) - - - Notes - ----- - Safe to write in multiple threads. - - """ - - def __init__(self, root=None, cls=dict, dimension_separator=None): - if root is None: - self.root = cls() - else: - self.root = root - self.cls = cls - self.write_mutex = Lock() - self._dimension_separator = dimension_separator - - def __getstate__(self): - return self.root, self.cls - - def __setstate__(self, state): - root, cls = state - self.__init__(root=root, cls=cls) - - def _get_parent(self, item: str): - parent = self.root - # split the item - segments = item.split("/") - # find the parent container - for k in segments[:-1]: - parent = parent[k] - if not isinstance(parent, self.cls): - raise KeyError(item) - return parent, segments[-1] - - def _require_parent(self, item): - parent = self.root - # split the item - segments = item.split("/") - # require the parent container - for k in segments[:-1]: - try: - parent = parent[k] - except KeyError: - parent[k] = self.cls() - parent = parent[k] - else: - if not isinstance(parent, self.cls): - raise KeyError(item) - return parent, segments[-1] - - def __getitem__(self, item: str): - parent, key = self._get_parent(item) - try: - value = parent[key] - except KeyError as e: - raise KeyError(item) from e - else: - if isinstance(value, self.cls): - raise KeyError(item) - else: - return value - - def __setitem__(self, item: str, value): - with self.write_mutex: - parent, key = self._require_parent(item) - value = ensure_bytes(value) - parent[key] = value - - def __delitem__(self, item: str): - with self.write_mutex: - parent, key = self._get_parent(item) - try: - del parent[key] - except KeyError as e: - raise KeyError(item) from e - - def __contains__(self, item: str): # type: ignore[override] - try: - parent, key = self._get_parent(item) - value = parent[key] - except KeyError: - return False - else: - return not isinstance(value, self.cls) - - def __eq__(self, other): - return isinstance(other, MemoryStore) and self.root == other.root and self.cls == other.cls - - def keys(self): - yield from _dict_store_keys(self.root, cls=self.cls) - - def __iter__(self): - return self.keys() - - def __len__(self) -> int: - return sum(1 for _ in self.keys()) - - def listdir(self, path: Path = None) -> List[str]: - path = normalize_storage_path(path) - if path: - try: - parent, key = self._get_parent(path) - value = parent[key] - except KeyError: - return [] - else: - value = self.root - if isinstance(value, self.cls): - return sorted(value.keys()) - else: - return [] - - def rename(self, src_path: Path, dst_path: Path): - src_path = normalize_storage_path(src_path) - dst_path = normalize_storage_path(dst_path) - - src_parent, src_key = self._get_parent(src_path) - dst_parent, dst_key = self._require_parent(dst_path) - - dst_parent[dst_key] = src_parent.pop(src_key) - - def rmdir(self, path: Path = None): - path = normalize_storage_path(path) - if path: - try: - parent, key = self._get_parent(path) - value = parent[key] - except KeyError: - return - else: - if isinstance(value, self.cls): - del parent[key] - else: - # clear out root - self.root = self.cls() - - def getsize(self, path: Path = None): - path = normalize_storage_path(path) - - # obtain value to return size of - value = None - if path: - try: - parent, key = self._get_parent(path) - value = parent[key] - except KeyError: - pass - else: - value = self.root - - # obtain size of value - if value is None: - return 0 - - elif isinstance(value, self.cls): - # total size for directory - size = 0 - for v in value.values(): - if not isinstance(v, self.cls): - size += buffer_size(v) - return size - - else: - return buffer_size(value) - - def clear(self): - with self.write_mutex: - self.root.clear() - - -class DictStore(MemoryStore): - def __init__(self, *args, **kwargs): - warnings.warn( - "DictStore has been renamed to MemoryStore in 2.4.0 and " - "will be removed in the future. Please use MemoryStore.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) - - -class DirectoryStore(Store): - """Storage class using directories and files on a standard file system. - - Parameters - ---------- - path : string - Location of directory to use as the root of the storage hierarchy. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-sensitive and - case-insensitive file system. Default value is False. - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.DirectoryStore('data/array.zarr') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - - Each chunk of the array is stored as a separate file on the file system, - i.e.:: - - >>> import os - >>> sorted(os.listdir('data/array.zarr')) - ['.zarray', '0.0', '0.1', '1.0', '1.1'] - - Store a group:: - - >>> store = zarr.DirectoryStore('data/group.zarr') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - - When storing a group, levels in the group hierarchy will correspond to - directories on the file system, i.e.:: - - >>> sorted(os.listdir('data/group.zarr')) - ['.zgroup', 'foo'] - >>> sorted(os.listdir('data/group.zarr/foo')) - ['.zgroup', 'bar'] - >>> sorted(os.listdir('data/group.zarr/foo/bar')) - ['.zarray', '0.0', '0.1', '1.0', '1.1'] - - Notes - ----- - Atomic writes are used, which means that data are first written to a - temporary file, then moved into place when the write is successfully - completed. Files are only held open while they are being read or written and are - closed immediately afterwards, so there is no need to manually close any files. - - Safe to write in multiple threads or processes. - - """ - - def __init__( - self, path, normalize_keys=False, dimension_separator: Optional[DIMENSION_SEPARATOR] = None - ): - # guard conditions - path = os.path.abspath(path) - if os.path.exists(path) and not os.path.isdir(path): - raise FSPathExistNotDir(path) - - self.path = path - self.normalize_keys = normalize_keys - self._dimension_separator = dimension_separator - - def _normalize_key(self, key): - return key.lower() if self.normalize_keys else key - - @staticmethod - def _fromfile(fn): - """Read data from a file - - Parameters - ---------- - fn : str - Filepath to open and read from. - - Notes - ----- - Subclasses should overload this method to specify any custom - file reading logic. - """ - with open(fn, "rb") as f: - return f.read() - - @staticmethod - def _tofile(a, fn): - """Write data to a file - - Parameters - ---------- - a : array-like - Data to write into the file. - fn : str - Filepath to open and write to. - - Notes - ----- - Subclasses should overload this method to specify any custom - file writing logic. - """ - with open(fn, mode="wb") as f: - f.write(a) - - def __getitem__(self, key): - key = self._normalize_key(key) - filepath = os.path.join(self.path, key) - if os.path.isfile(filepath): - return self._fromfile(filepath) - else: - raise KeyError(key) - - def __setitem__(self, key, value): - key = self._normalize_key(key) - - # coerce to flat, contiguous array (ideally without copying) - value = ensure_contiguous_ndarray_like(value) - - # destination path for key - file_path = os.path.join(self.path, key) - - # ensure there is no directory in the way - if os.path.isdir(file_path): - shutil.rmtree(file_path) - - # ensure containing directory exists - dir_path, file_name = os.path.split(file_path) - if os.path.isfile(dir_path): - raise KeyError(key) - if not os.path.exists(dir_path): - try: - os.makedirs(dir_path) - except OSError as e: - if e.errno != errno.EEXIST: - raise KeyError(key) from e - - # write to temporary file - # note we're not using tempfile.NamedTemporaryFile to avoid restrictive file permissions - temp_name = file_name + "." + uuid.uuid4().hex + ".partial" - temp_path = os.path.join(dir_path, temp_name) - try: - self._tofile(value, temp_path) - - # move temporary file into place; - # make several attempts at writing the temporary file to get past - # potential antivirus file locking issues - retry_call(os.replace, (temp_path, file_path), exceptions=(PermissionError,)) - - finally: - # clean up if temp file still exists for whatever reason - if os.path.exists(temp_path): # pragma: no cover - os.remove(temp_path) - - def __delitem__(self, key): - key = self._normalize_key(key) - path = os.path.join(self.path, key) - if os.path.isfile(path): - os.remove(path) - elif os.path.isdir(path): - # include support for deleting directories, even though strictly - # speaking these do not exist as keys in the store - shutil.rmtree(path) - else: - raise KeyError(key) - - def __contains__(self, key): - key = self._normalize_key(key) - file_path = os.path.join(self.path, key) - return os.path.isfile(file_path) - - def __eq__(self, other): - return isinstance(other, DirectoryStore) and self.path == other.path - - def keys(self): - if os.path.exists(self.path): - yield from self._keys_fast(self.path) - - @staticmethod - def _keys_fast(path, walker=os.walk): - for dirpath, _, filenames in walker(path): - dirpath = os.path.relpath(dirpath, path) - if dirpath == os.curdir: - for f in filenames: - yield f - else: - dirpath = dirpath.replace("\\", "/") - for f in filenames: - yield "/".join((dirpath, f)) - - def __iter__(self): - return self.keys() - - def __len__(self): - return sum(1 for _ in self.keys()) - - def dir_path(self, path=None): - store_path = normalize_storage_path(path) - dir_path = self.path - if store_path: - dir_path = os.path.join(dir_path, store_path) - return dir_path - - def listdir(self, path=None): - return ( - self._nested_listdir(path) - if self._dimension_separator == "/" - else self._flat_listdir(path) - ) - - def _flat_listdir(self, path=None): - dir_path = self.dir_path(path) - if os.path.isdir(dir_path): - return sorted(os.listdir(dir_path)) - else: - return [] - - def _nested_listdir(self, path=None): - children = self._flat_listdir(path=path) - if array_meta_key in children: - # special handling of directories containing an array to map nested chunk - # keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and os.path.isdir(entry_path): - for dir_path, _, file_names in os.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] - new_children.append( - rel_path.replace(os.path.sep, self._dimension_separator or ".") - ) - else: - new_children.append(entry) - return sorted(new_children) - else: - return children - - def rename(self, src_path, dst_path): - store_src_path = normalize_storage_path(src_path) - store_dst_path = normalize_storage_path(dst_path) - - dir_path = self.path - - src_path = os.path.join(dir_path, store_src_path) - dst_path = os.path.join(dir_path, store_dst_path) - - os.renames(src_path, dst_path) - - def rmdir(self, path=None): - store_path = normalize_storage_path(path) - dir_path = self.path - if store_path: - dir_path = os.path.join(dir_path, store_path) - if os.path.isdir(dir_path): - shutil.rmtree(dir_path) - - def getsize(self, path=None): - store_path = normalize_storage_path(path) - fs_path = self.path - if store_path: - fs_path = os.path.join(fs_path, store_path) - if os.path.isfile(fs_path): - return os.path.getsize(fs_path) - elif os.path.isdir(fs_path): - size = 0 - for child in scandir(fs_path): - if child.is_file(): - size += child.stat().st_size - return size - else: - return 0 - - def clear(self): - shutil.rmtree(self.path) - - -def atexit_rmtree(path, isdir=os.path.isdir, rmtree=shutil.rmtree): # pragma: no cover - """Ensure directory removal at interpreter exit.""" - if isdir(path): - rmtree(path) - - -# noinspection PyShadowingNames -def atexit_rmglob( - path, - glob=glob.glob, - isdir=os.path.isdir, - isfile=os.path.isfile, - remove=os.remove, - rmtree=shutil.rmtree, -): # pragma: no cover - """Ensure removal of multiple files at interpreter exit.""" - for p in glob(path): - if isfile(p): - remove(p) - elif isdir(p): - rmtree(p) - - -class FSStore(Store): - """Wraps an fsspec.FSMap to give access to arbitrary filesystems - - Requires that ``fsspec`` is installed, as well as any additional - requirements for the protocol chosen. - - Parameters - ---------- - url : str - The destination to map. If no fs is provided, should include protocol - and path, like "s3://bucket/root". If an fs is provided, can be a path - within that filesystem, like "bucket/root" - normalize_keys : bool - key_separator : str - public API for accessing dimension_separator. Never `None` - See dimension_separator for more information. - mode : str - "w" for writable, "r" for read-only - exceptions : list of Exception subclasses - When accessing data, any of these exceptions will be treated - as a missing key - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - fs : fsspec.spec.AbstractFileSystem, optional - An existing filesystem to use for the store. - check : bool, optional - If True, performs a touch at the root location, to check for write access. - Passed to `fsspec.mapping.FSMap` constructor. - create : bool, optional - If True, performs a mkdir at the rool location. - Passed to `fsspec.mapping.FSMap` constructor. - missing_exceptions : sequence of Exceptions, optional - Exceptions classes to associate with missing files. - Passed to `fsspec.mapping.FSMap` constructor. - storage_options : passed to the fsspec implementation. Cannot be used - together with fs. - """ - - _array_meta_key = array_meta_key - _group_meta_key = group_meta_key - _attrs_key = attrs_key - - def __init__( - self, - url, - normalize_keys=False, - key_separator=None, - mode="w", - exceptions=(KeyError, PermissionError, IOError), - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - fs=None, - check=False, - create=False, - missing_exceptions=None, - **storage_options, - ): - if not self._fsspec_installed(): # pragma: no cover - raise ImportError("`fsspec` is required to use zarr's FSStore") - import fsspec - - mapper_options = {"check": check, "create": create} - # https://github.com/zarr-developers/zarr-python/pull/911#discussion_r841926292 - # Some fsspec implementations don't accept missing_exceptions. - # This is a workaround to avoid passing it in the most common scenarios. - # Remove this and add missing_exceptions to mapper_options when fsspec is released. - if missing_exceptions is not None: - mapper_options["missing_exceptions"] = missing_exceptions # pragma: no cover - - if fs is None: - protocol, _ = fsspec.core.split_protocol(url) - # set auto_mkdir to True for local file system - if protocol in (None, "file") and not storage_options.get("auto_mkdir"): - storage_options["auto_mkdir"] = True - self.map = fsspec.get_mapper(url, **{**mapper_options, **storage_options}) - self.fs = self.map.fs # for direct operations - self.path = self.fs._strip_protocol(url) - else: - if storage_options: - raise ValueError("Cannot specify both fs and storage_options") - self.fs = fs - self.path = self.fs._strip_protocol(url) - self.map = self.fs.get_mapper(self.path, **mapper_options) - - self.normalize_keys = normalize_keys - self.mode = mode - self.exceptions = exceptions - # For backwards compatibility. Guaranteed to be non-None - if key_separator is not None: - dimension_separator = key_separator - - self.key_separator = dimension_separator - self._default_key_separator() - - # Pass attributes to array creation - self._dimension_separator = dimension_separator - - def _default_key_separator(self): - if self.key_separator is None: - self.key_separator = "." - - def _normalize_key(self, key): - key = normalize_storage_path(key).lstrip("/") - if key: - *bits, end = key.split("/") - - if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): - end = end.replace(".", self.key_separator) - key = "/".join(bits + [end]) - - return key.lower() if self.normalize_keys else key - - def getitems( - self, keys: Sequence[str], *, contexts: Mapping[str, Context] - ) -> Mapping[str, Any]: - keys_transformed = {self._normalize_key(key): key for key in keys} - results_transformed = self.map.getitems(list(keys_transformed), on_error="return") - results = {} - for k, v in results_transformed.items(): - if isinstance(v, self.exceptions): - # Cause recognized exceptions to prompt a KeyError in the - # function calling this method - continue - elif isinstance(v, Exception): - # Raise any other exception - raise v - else: - # The function calling this method may not recognize the transformed - # keys, so we send the values returned by self.map.getitems back into - # the original key space. - results[keys_transformed[k]] = v - return results - - def __getitem__(self, key): - key = self._normalize_key(key) - try: - return self.map[key] - except self.exceptions as e: - raise KeyError(key) from e - - def setitems(self, values): - if self.mode == "r": - raise ReadOnlyError() - - # Normalize keys and make sure the values are bytes - values = { - self._normalize_key(key): ensure_contiguous_ndarray_or_bytes(val) - for key, val in values.items() - } - self.map.setitems(values) - - def __setitem__(self, key, value): - if self.mode == "r": - raise ReadOnlyError() - key = self._normalize_key(key) - value = ensure_contiguous_ndarray_or_bytes(value) - path = self.dir_path(key) - try: - if self.fs.isdir(path): - self.fs.rm(path, recursive=True) - self.map[key] = value - self.fs.invalidate_cache(self.fs._parent(path)) - except self.exceptions as e: - raise KeyError(key) from e - - def __delitem__(self, key): - if self.mode == "r": - raise ReadOnlyError() - key = self._normalize_key(key) - path = self.dir_path(key) - if self.fs.isdir(path): - self.fs.rm(path, recursive=True) - else: - del self.map[key] - - def delitems(self, keys): - if self.mode == "r": - raise ReadOnlyError() - # only remove the keys that exist in the store - nkeys = [self._normalize_key(key) for key in keys if key in self] - # rm errors if you pass an empty collection - if len(nkeys) > 0: - self.map.delitems(nkeys) - - def __contains__(self, key): - key = self._normalize_key(key) - return key in self.map - - def __eq__(self, other): - return type(self) is type(other) and self.map == other.map and self.mode == other.mode - - def keys(self): - return iter(self.map) - - def __iter__(self): - return self.keys() - - def __len__(self): - return len(list(self.keys())) - - def dir_path(self, path=None): - store_path = normalize_storage_path(path) - return self.map._key_to_str(store_path) - - def listdir(self, path=None): - dir_path = self.dir_path(path) - try: - children = sorted( - p.rstrip("/").rsplit("/", 1)[-1] for p in self.fs.ls(dir_path, detail=False) - ) - if self.key_separator != "/": - return children - else: - if self._array_meta_key in children: - # special handling of directories containing an array to map nested chunk - # keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and self.fs.isdir(entry_path): - for file_name in self.fs.find(entry_path): - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path)[1] - rel_path = rel_path.lstrip("/") - new_children.append(rel_path.replace("/", ".")) - else: - new_children.append(entry) - return sorted(new_children) - else: - return children - except OSError: - return [] - - def rmdir(self, path=None): - if self.mode == "r": - raise ReadOnlyError() - store_path = self.dir_path(path) - if self.fs.isdir(store_path): - self.fs.rm(store_path, recursive=True) - - def getsize(self, path=None): - store_path = self.dir_path(path) - return self.fs.du(store_path, True, True) - - def clear(self): - if self.mode == "r": - raise ReadOnlyError() - self.map.clear() - - @classmethod - @lru_cache(maxsize=None) - def _fsspec_installed(cls): - """Returns true if fsspec is installed""" - import importlib.util - - return importlib.util.find_spec("fsspec") is not None - - -class TempStore(DirectoryStore): - """Directory store using a temporary directory for storage. - - Parameters - ---------- - suffix : string, optional - Suffix for the temporary directory name. - prefix : string, optional - Prefix for the temporary directory name. - dir : string, optional - Path to parent directory in which to create temporary directory. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-sensitive and - case-insensitive file system. Default value is False. - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - """ - - # noinspection PyShadowingBuiltins - def __init__( - self, - suffix="", - prefix="zarr", - dir=None, - normalize_keys=False, - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - ): - path = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir) - atexit.register(atexit_rmtree, path) - super().__init__(path, normalize_keys=normalize_keys) - - -_prog_ckey = re.compile(r"^(\d+)(\.\d+)+$") -_prog_number = re.compile(r"^\d+$") - - -class NestedDirectoryStore(DirectoryStore): - """Storage class using directories and files on a standard file system, with - special handling for chunk keys so that chunk files for multidimensional - arrays are stored in a nested directory tree. - - .. deprecated:: 2.18.0 - NestedDirectoryStore will be removed in Zarr-Python 3.0 where controlling - the chunk key encoding will be supported as part of the array metadata. See - `GH1274 `_ - for more information. - - Parameters - ---------- - path : string - Location of directory to use as the root of the storage hierarchy. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-sensitive and - case-insensitive file system. Default value is False. - dimension_separator : {'/'}, optional - Separator placed between the dimensions of a chunk. - Only supports "/" unlike other implementations. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.NestedDirectoryStore('data/array.zarr') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - - Each chunk of the array is stored as a separate file on the file system, - note the multiple directory levels used for the chunk files:: - - >>> import os - >>> sorted(os.listdir('data/array.zarr')) - ['.zarray', '0', '1'] - >>> sorted(os.listdir('data/array.zarr/0')) - ['0', '1'] - >>> sorted(os.listdir('data/array.zarr/1')) - ['0', '1'] - - Store a group:: - - >>> store = zarr.NestedDirectoryStore('data/group.zarr') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - - When storing a group, levels in the group hierarchy will correspond to - directories on the file system, i.e.:: - - >>> sorted(os.listdir('data/group.zarr')) - ['.zgroup', 'foo'] - >>> sorted(os.listdir('data/group.zarr/foo')) - ['.zgroup', 'bar'] - >>> sorted(os.listdir('data/group.zarr/foo/bar')) - ['.zarray', '0', '1'] - >>> sorted(os.listdir('data/group.zarr/foo/bar/0')) - ['0', '1'] - >>> sorted(os.listdir('data/group.zarr/foo/bar/1')) - ['0', '1'] - - Notes - ----- - The :class:`DirectoryStore` class stores all chunk files for an array - together in a single directory. On some file systems, the potentially large - number of files in a single directory can cause performance issues. The - :class:`NestedDirectoryStore` class provides an alternative where chunk - files for multidimensional arrays will be organised into a directory - hierarchy, thus reducing the number of files in any one directory. - - Safe to write in multiple threads or processes. - - """ - - def __init__( - self, path, normalize_keys=False, dimension_separator: Optional[DIMENSION_SEPARATOR] = "/" - ): - - warnings.warn( - V3_DEPRECATION_MESSAGE.format(store=self.__class__.__name__), - FutureWarning, - stacklevel=2, - ) - - super().__init__(path, normalize_keys=normalize_keys) - if dimension_separator is None: - dimension_separator = "/" - elif dimension_separator != "/": - raise ValueError("NestedDirectoryStore only supports '/' as dimension_separator") - self._dimension_separator = dimension_separator - - def __eq__(self, other): - return isinstance(other, NestedDirectoryStore) and self.path == other.path - - -# noinspection PyPep8Naming -class ZipStore(Store): - """Storage class using a Zip file. - - Parameters - ---------- - path : string - Location of file. - compression : integer, optional - Compression method to use when writing to the archive. - allowZip64 : bool, optional - If True (the default) will create ZIP files that use the ZIP64 - extensions when the zipfile is larger than 2 GiB. If False - will raise an exception when the ZIP file would require ZIP64 - extensions. - mode : string, optional - One of 'r' to read an existing file, 'w' to truncate and write a new - file, 'a' to append to an existing file, or 'x' to exclusively create - and write a new file. - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.ZipStore('data/array.zip', mode='w') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store) - >>> z[...] = 42 - >>> store.close() # don't forget to call this when you're done - - Store a group:: - - >>> store = zarr.ZipStore('data/group.zip', mode='w') - >>> root = zarr.group(store=store) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - >>> store.close() # don't forget to call this when you're done - - After modifying a ZipStore, the ``close()`` method must be called, otherwise - essential data will not be written to the underlying Zip file. The ZipStore - class also supports the context manager protocol, which ensures the ``close()`` - method is called on leaving the context, e.g.:: - - >>> with zarr.ZipStore('data/array.zip', mode='w') as store: - ... z = zarr.zeros((10, 10), chunks=(5, 5), store=store) - ... z[...] = 42 - ... # no need to call store.close() - - Notes - ----- - Each chunk of an array is stored as a separate entry in the Zip file. Note - that Zip files do not provide any way to remove or replace existing entries. - If an attempt is made to replace an entry, then a warning is generated by - the Python standard library about a duplicate Zip file entry. This can be - triggered if you attempt to write data to a Zarr array more than once, - e.g.:: - - >>> store = zarr.ZipStore('data/example.zip', mode='w') - >>> z = zarr.zeros(100, chunks=10, store=store) - >>> # first write OK - ... z[...] = 42 - >>> # second write generates warnings - ... z[...] = 42 # doctest: +SKIP - >>> store.close() - - This can also happen in a more subtle situation, where data are written only - once to a Zarr array, but the write operations are not aligned with chunk - boundaries, e.g.:: - - >>> store = zarr.ZipStore('data/example.zip', mode='w') - >>> z = zarr.zeros(100, chunks=10, store=store) - >>> z[5:15] = 42 - >>> # write overlaps chunk previously written, generates warnings - ... z[15:25] = 42 # doctest: +SKIP - - To avoid creating duplicate entries, only write data once, and align writes - with chunk boundaries. This alignment is done automatically if you call - ``z[...] = ...`` or create an array from existing data via :func:`zarr.array`. - - Alternatively, use a :class:`DirectoryStore` when writing the data, then - manually Zip the directory and use the Zip file for subsequent reads. - Take note that the files in the Zip file must be relative to the root of the - Zarr archive. You may find it easier to create such a Zip file with ``7z``, e.g.:: - - 7z a -tzip archive.zarr.zip archive.zarr/. - - Safe to write in multiple threads but not in multiple processes. - - """ - - _erasable = False - - def __init__( - self, - path, - compression=zipfile.ZIP_STORED, - allowZip64=True, - mode="a", - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - ): - # store properties - path = os.path.abspath(path) - self.path = path - self.compression = compression - self.allowZip64 = allowZip64 - self.mode = mode - self._dimension_separator = dimension_separator - - # Current understanding is that zipfile module in stdlib is not thread-safe, - # and so locking is required for both read and write. However, this has not - # been investigated in detail, perhaps no lock is needed if mode='r'. - self.mutex = RLock() - - # open zip file - self.zf = zipfile.ZipFile(path, mode=mode, compression=compression, allowZip64=allowZip64) - - def __getstate__(self): - self.flush() - return self.path, self.compression, self.allowZip64, self.mode - - def __setstate__(self, state): - path, compression, allowZip64, mode = state - # if initially opened with mode 'w' or 'x', re-open in mode 'a' so file doesn't - # get clobbered - if mode in "wx": - mode = "a" - self.__init__(path=path, compression=compression, allowZip64=allowZip64, mode=mode) - - def close(self): - """Closes the underlying zip file, ensuring all records are written.""" - with self.mutex: - self.zf.close() - - def flush(self): - """Closes the underlying zip file, ensuring all records are written, - then re-opens the file for further modifications.""" - if self.mode != "r": - with self.mutex: - self.zf.close() - # N.B., re-open with mode 'a' regardless of initial mode so we don't wipe - # what's been written - self.zf = zipfile.ZipFile( - self.path, mode="a", compression=self.compression, allowZip64=self.allowZip64 - ) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - def __getitem__(self, key): - with self.mutex: - with self.zf.open(key) as f: # will raise KeyError - return f.read() - - def __setitem__(self, key, value): - if self.mode == "r": - raise ReadOnlyError() - value = ensure_contiguous_ndarray_like(value).view("u1") - with self.mutex: - # writestr(key, value) writes with default permissions from - # zipfile (600) that are too restrictive, build ZipInfo for - # the key to work around limitation - keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6]) - keyinfo.compress_type = self.compression - if keyinfo.filename[-1] == os.sep: - keyinfo.external_attr = 0o40775 << 16 # drwxrwxr-x - keyinfo.external_attr |= 0x10 # MS-DOS directory flag - else: - keyinfo.external_attr = 0o644 << 16 # ?rw-r--r-- - - self.zf.writestr(keyinfo, value) - - def __delitem__(self, key): - raise NotImplementedError - - def __eq__(self, other): - return ( - isinstance(other, ZipStore) - and self.path == other.path - and self.compression == other.compression - and self.allowZip64 == other.allowZip64 - ) - - def keylist(self): - with self.mutex: - return sorted(self.zf.namelist()) - - def keys(self): - yield from self.keylist() - - def __iter__(self): - return self.keys() - - def __len__(self): - return sum(1 for _ in self.keys()) - - def __contains__(self, key): - try: - with self.mutex: - self.zf.getinfo(key) - except KeyError: - return False - else: - return True - - def listdir(self, path=None): - path = normalize_storage_path(path) - return _listdir_from_keys(self, path) - - def getsize(self, path=None): - path = normalize_storage_path(path) - with self.mutex: - children = self.listdir(path) - if children: - size = 0 - for child in children: - if path: - name = path + "/" + child - else: - name = child - try: - info = self.zf.getinfo(name) - except KeyError: - pass - else: - size += info.compress_size - return size - elif path: - try: - info = self.zf.getinfo(path) - return info.compress_size - except KeyError: - return 0 - else: - return 0 - - def clear(self): - if self.mode == "r": - raise ReadOnlyError() - with self.mutex: - self.close() - os.remove(self.path) - self.zf = zipfile.ZipFile( - self.path, mode=self.mode, compression=self.compression, allowZip64=self.allowZip64 - ) - - -def migrate_1to2(store): - """Migrate array metadata in `store` from Zarr format version 1 to - version 2. - - Parameters - ---------- - store : Store - Store to be migrated. - - Notes - ----- - Version 1 did not support hierarchies, so this migration function will - look for a single array in `store` and migrate the array metadata to - version 2. - - """ - - # migrate metadata - from zarr import meta_v1 - - meta = meta_v1.decode_metadata(store["meta"]) - del store["meta"] - - # add empty filters - meta["filters"] = None - - # migration compression metadata - compression = meta["compression"] - if compression is None or compression == "none": - compressor_config = None - else: - compression_opts = meta["compression_opts"] - codec_cls = codec_registry[compression] - if isinstance(compression_opts, dict): - compressor = codec_cls(**compression_opts) - else: - compressor = codec_cls(compression_opts) - compressor_config = compressor.get_config() - meta["compressor"] = compressor_config - del meta["compression"] - del meta["compression_opts"] - - # store migrated metadata - if hasattr(store, "_metadata_class"): - store[array_meta_key] = store._metadata_class.encode_array_metadata(meta) - else: - store[array_meta_key] = encode_array_metadata(meta) - - # migrate user attributes - store[attrs_key] = store["attrs"] - del store["attrs"] - - -# noinspection PyShadowingBuiltins -class DBMStore(Store): - """Storage class using a DBM-style database. - - .. deprecated:: 2.18.0 - DBMStore will be removed in Zarr-Python 3.0. See - `GH1274 `_ - for more information. - - Parameters - ---------- - path : string - Location of database file. - flag : string, optional - Flags for opening the database file. - mode : int - File mode used if a new file is created. - open : function, optional - Function to open the database file. If not provided, :func:`dbm.open` will be - used on Python 3, and :func:`anydbm.open` will be used on Python 2. - write_lock: bool, optional - Use a lock to prevent concurrent writes from multiple threads (True by default). - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk.e - **open_kwargs - Keyword arguments to pass the `open` function. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.DBMStore('data/array.db') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - >>> store.close() # don't forget to call this when you're done - - Store a group:: - - >>> store = zarr.DBMStore('data/group.db') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - >>> store.close() # don't forget to call this when you're done - - After modifying a DBMStore, the ``close()`` method must be called, otherwise - essential data may not be written to the underlying database file. The - DBMStore class also supports the context manager protocol, which ensures the - ``close()`` method is called on leaving the context, e.g.:: - - >>> with zarr.DBMStore('data/array.db') as store: - ... z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - ... z[...] = 42 - ... # no need to call store.close() - - A different database library can be used by passing a different function to - the `open` parameter. For example, if the `bsddb3 - `_ package is installed, a - Berkeley DB database can be used:: - - >>> import bsddb3 - >>> store = zarr.DBMStore('data/array.bdb', open=bsddb3.btopen) - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - >>> store.close() - - Notes - ----- - Please note that, by default, this class will use the Python standard - library `dbm.open` function to open the database file (or `anydbm.open` on - Python 2). There are up to three different implementations of DBM-style - databases available in any Python installation, and which one is used may - vary from one system to another. Database file formats are not compatible - between these different implementations. Also, some implementations are - more efficient than others. In particular, the "dumb" implementation will be - the fall-back on many systems, and has very poor performance for some usage - scenarios. If you want to ensure a specific implementation is used, pass the - corresponding open function, e.g., `dbm.gnu.open` to use the GNU DBM - library. - - Safe to write in multiple threads. May be safe to write in multiple processes, - depending on which DBM implementation is being used, although this has not been - tested. - - """ - - def __init__( - self, - path, - flag="c", - mode=0o666, - open=None, - write_lock=True, - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - **open_kwargs, - ): - warnings.warn( - V3_DEPRECATION_MESSAGE.format(store=self.__class__.__name__), - FutureWarning, - stacklevel=2, - ) - - if open is None: - import dbm - - open = dbm.open - path = os.path.abspath(path) - # noinspection PyArgumentList - self.db = open(path, flag, mode, **open_kwargs) - self.path = path - self.flag = flag - self.mode = mode - self.open = open - self.write_lock = write_lock - self.write_mutex: Union[Lock, NoLock] - if write_lock: - # This may not be required as some dbm implementations manage their own - # locks, but err on the side of caution. - self.write_mutex = Lock() - else: - self.write_mutex = nolock - self.open_kwargs = open_kwargs - self._dimension_separator = dimension_separator - - def __getstate__(self): - try: - self.flush() # needed for ndbm - except Exception: - # flush may fail if db has already been closed - pass - return (self.path, self.flag, self.mode, self.open, self.write_lock, self.open_kwargs) - - def __setstate__(self, state): - path, flag, mode, open, write_lock, open_kws = state - if flag[0] == "n": - flag = "c" + flag[1:] # don't clobber an existing database - self.__init__(path=path, flag=flag, mode=mode, open=open, write_lock=write_lock, **open_kws) - - def close(self): - """Closes the underlying database file.""" - if hasattr(self.db, "close"): - with self.write_mutex: - self.db.close() - - def flush(self): - """Synchronizes data to the underlying database file.""" - if self.flag[0] != "r": - with self.write_mutex: - if hasattr(self.db, "sync"): - self.db.sync() - else: # pragma: no cover - # we don't cover this branch anymore as ndbm (oracle) is not packaged - # by conda-forge on non-mac OS: - # https://github.com/conda-forge/staged-recipes/issues/4476 - # fall-back, close and re-open, needed for ndbm - flag = self.flag - if flag[0] == "n": - flag = "c" + flag[1:] # don't clobber an existing database - self.db.close() - # noinspection PyArgumentList - self.db = self.open(self.path, flag, self.mode, **self.open_kwargs) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - def __getitem__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - return self.db[key] - - def __setitem__(self, key, value): - if isinstance(key, str): - key = key.encode("ascii") - value = ensure_bytes(value) - with self.write_mutex: - self.db[key] = value - - def __delitem__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - with self.write_mutex: - del self.db[key] - - def __eq__(self, other): - return ( - isinstance(other, DBMStore) - and self.path == other.path - and - # allow flag and mode to differ - self.open == other.open - and self.open_kwargs == other.open_kwargs - ) - - def keys(self): - return (ensure_text(k, "ascii") for k in iter(self.db.keys())) - - def __iter__(self): - return self.keys() - - def __len__(self): - return sum(1 for _ in self.keys()) - - def __contains__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - return key in self.db - - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys(self, path) - - -class LMDBStore(Store): - """Storage class using LMDB. Requires the `lmdb `_ - package to be installed. - - .. deprecated:: 2.18.0 - LMDBStore will be removed in Zarr-Python 3.0. See - `GH1274 `_ - for more information. - - Parameters - ---------- - path : string - Location of database file. - buffers : bool, optional - If True (default) use support for buffers, which should increase performance by - reducing memory copies. - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - **kwargs - Keyword arguments passed through to the `lmdb.open` function. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.LMDBStore('data/array.mdb') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - >>> store.close() # don't forget to call this when you're done - - Store a group:: - - >>> store = zarr.LMDBStore('data/group.mdb') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - >>> store.close() # don't forget to call this when you're done - - After modifying a DBMStore, the ``close()`` method must be called, otherwise - essential data may not be written to the underlying database file. The - DBMStore class also supports the context manager protocol, which ensures the - ``close()`` method is called on leaving the context, e.g.:: - - >>> with zarr.LMDBStore('data/array.mdb') as store: - ... z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - ... z[...] = 42 - ... # no need to call store.close() - - Notes - ----- - By default writes are not immediately flushed to disk to increase performance. You - can ensure data are flushed to disk by calling the ``flush()`` or ``close()`` methods. - - Should be safe to write in multiple threads or processes due to the synchronization - support within LMDB, although writing from multiple processes has not been tested. - - """ - - def __init__( - self, - path, - buffers=True, - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - **kwargs, - ): - import lmdb - - warnings.warn( - V3_DEPRECATION_MESSAGE.format(store=self.__class__.__name__), - FutureWarning, - stacklevel=2, - ) - - # set default memory map size to something larger than the lmdb default, which is - # very likely to be too small for any moderate array (logic copied from zict) - map_size = 2**40 if sys.maxsize >= 2**32 else 2**28 - kwargs.setdefault("map_size", map_size) - - # don't initialize buffers to zero by default, shouldn't be necessary - kwargs.setdefault("meminit", False) - - # decide whether to use the writemap option based on the operating system's - # support for sparse files - writemap requires sparse file support otherwise - # the whole# `map_size` may be reserved up front on disk (logic copied from zict) - writemap = sys.platform.startswith("linux") - kwargs.setdefault("writemap", writemap) - - # decide options for when data are flushed to disk - choose to delay syncing - # data to filesystem, otherwise pay a large performance penalty (zict also does - # this) - kwargs.setdefault("metasync", False) - kwargs.setdefault("sync", False) - kwargs.setdefault("map_async", False) - - # set default option for number of cached transactions - max_spare_txns = multiprocessing.cpu_count() - kwargs.setdefault("max_spare_txns", max_spare_txns) - - # normalize path - path = os.path.abspath(path) - - # open database - self.db = lmdb.open(path, **kwargs) - - # store properties - self.buffers = buffers - self.path = path - self.kwargs = kwargs - self._dimension_separator = dimension_separator - - def __getstate__(self): - try: - self.flush() # just in case - except Exception: - # flush may fail if db has already been closed - pass - return self.path, self.buffers, self.kwargs - - def __setstate__(self, state): - path, buffers, kwargs = state - self.__init__(path=path, buffers=buffers, **kwargs) - - def close(self): - """Closes the underlying database.""" - self.db.close() - - def flush(self): - """Synchronizes data to the file system.""" - self.db.sync() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - def __getitem__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - # use the buffers option, should avoid a memory copy - with self.db.begin(buffers=self.buffers) as txn: - value = txn.get(key) - if value is None: - raise KeyError(key) - return value - - def __setitem__(self, key, value): - if isinstance(key, str): - key = key.encode("ascii") - with self.db.begin(write=True, buffers=self.buffers) as txn: - txn.put(key, value) - - def __delitem__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - with self.db.begin(write=True) as txn: - if not txn.delete(key): - raise KeyError(key) - - def __contains__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - with self.db.begin(buffers=self.buffers) as txn: - with txn.cursor() as cursor: - return cursor.set_key(key) - - def items(self): - with self.db.begin(buffers=self.buffers) as txn: - with txn.cursor() as cursor: - for k, v in cursor.iternext(keys=True, values=True): - yield ensure_text(k, "ascii"), v - - def keys(self): - with self.db.begin(buffers=self.buffers) as txn: - with txn.cursor() as cursor: - for k in cursor.iternext(keys=True, values=False): - yield ensure_text(k, "ascii") - - def values(self): - with self.db.begin(buffers=self.buffers) as txn: - with txn.cursor() as cursor: - yield from cursor.iternext(keys=False, values=True) - - def __iter__(self): - return self.keys() - - def __len__(self): - return self.db.stat()["entries"] - - -class LRUStoreCache(Store): - """Storage class that implements a least-recently-used (LRU) cache layer over - some other store. Intended primarily for use with stores that can be slow to - access, e.g., remote stores that require network communication to store and - retrieve data. - - Parameters - ---------- - store : Store - The store containing the actual data to be cached. - max_size : int - The maximum size that the cache may grow to, in number of bytes. Provide `None` - if you would like the cache to have unlimited size. - - Examples - -------- - The example below wraps an S3 store with an LRU cache:: - - >>> import s3fs - >>> import zarr - >>> s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name='eu-west-2')) - >>> store = s3fs.S3Map(root='zarr-demo/store', s3=s3, check=False) - >>> cache = zarr.LRUStoreCache(store, max_size=2**28) - >>> root = zarr.group(store=cache) # doctest: +REMOTE_DATA - >>> z = root['foo/bar/baz'] # doctest: +REMOTE_DATA - >>> from timeit import timeit - >>> # first data access is relatively slow, retrieved from store - ... timeit('print(z[:].tobytes())', number=1, globals=globals()) # doctest: +SKIP - b'Hello from the cloud!' - 0.1081731989979744 - >>> # second data access is faster, uses cache - ... timeit('print(z[:].tobytes())', number=1, globals=globals()) # doctest: +SKIP - b'Hello from the cloud!' - 0.0009490990014455747 - - """ - - def __init__(self, store: StoreLike, max_size: int): - self._store: BaseStore = BaseStore._ensure_store(store) - self._max_size = max_size - self._current_size = 0 - self._keys_cache = None - self._contains_cache: Dict[Any, Any] = {} - self._listdir_cache: Dict[Path, Any] = dict() - self._values_cache: Dict[Path, Any] = OrderedDict() - self._mutex = Lock() - self.hits = self.misses = 0 - - def __getstate__(self): - return ( - self._store, - self._max_size, - self._current_size, - self._keys_cache, - self._contains_cache, - self._listdir_cache, - self._values_cache, - self.hits, - self.misses, - ) - - def __setstate__(self, state): - ( - self._store, - self._max_size, - self._current_size, - self._keys_cache, - self._contains_cache, - self._listdir_cache, - self._values_cache, - self.hits, - self.misses, - ) = state - self._mutex = Lock() - - def __len__(self): - return len(self._keys()) - - def __iter__(self): - return self.keys() - - def __contains__(self, key): - with self._mutex: - if key not in self._contains_cache: - self._contains_cache[key] = key in self._store - return self._contains_cache[key] - - def clear(self): - self._store.clear() - self.invalidate() - - def keys(self): - with self._mutex: - return iter(self._keys()) - - def _keys(self): - if self._keys_cache is None: - self._keys_cache = list(self._store.keys()) - return self._keys_cache - - def listdir(self, path: Path = None): - with self._mutex: - try: - return self._listdir_cache[path] - except KeyError: - listing = listdir(self._store, path) - self._listdir_cache[path] = listing - return listing - - def getsize(self, path=None) -> int: - return getsize(self._store, path=path) - - def _pop_value(self): - # remove the first value from the cache, as this will be the least recently - # used value - _, v = self._values_cache.popitem(last=False) - return v - - def _accommodate_value(self, value_size): - if self._max_size is None: - return - # ensure there is enough space in the cache for a new value - while self._current_size + value_size > self._max_size: - v = self._pop_value() - self._current_size -= buffer_size(v) - - def _cache_value(self, key: Path, value): - # cache a value - value_size = buffer_size(value) - # check size of the value against max size, as if the value itself exceeds max - # size then we are never going to cache it - if self._max_size is None or value_size <= self._max_size: - self._accommodate_value(value_size) - self._values_cache[key] = value - self._current_size += value_size - - def invalidate(self): - """Completely clear the cache.""" - with self._mutex: - self._values_cache.clear() - self._invalidate_keys() - self._current_size = 0 - - def invalidate_values(self): - """Clear the values cache.""" - with self._mutex: - self._values_cache.clear() - - def invalidate_keys(self): - """Clear the keys cache.""" - with self._mutex: - self._invalidate_keys() - - def _invalidate_keys(self): - self._keys_cache = None - self._contains_cache.clear() - self._listdir_cache.clear() - - def _invalidate_value(self, key): - if key in self._values_cache: - value = self._values_cache.pop(key) - self._current_size -= buffer_size(value) - - def __getitem__(self, key): - try: - # first try to obtain the value from the cache - with self._mutex: - value = self._values_cache[key] - # cache hit if no KeyError is raised - self.hits += 1 - # treat the end as most recently used - self._values_cache.move_to_end(key) - - except KeyError: - # cache miss, retrieve value from the store - value = self._store[key] - with self._mutex: - self.misses += 1 - # need to check if key is not in the cache, as it may have been cached - # while we were retrieving the value from the store - if key not in self._values_cache: - self._cache_value(key, value) - - return value - - def __setitem__(self, key, value): - self._store[key] = value - with self._mutex: - self._invalidate_keys() - self._invalidate_value(key) - self._cache_value(key, value) - - def __delitem__(self, key): - del self._store[key] - with self._mutex: - self._invalidate_keys() - self._invalidate_value(key) - - -class SQLiteStore(Store): - """Storage class using SQLite. - - .. deprecated:: 2.18.0 - SQLiteStore will be removed in Zarr-Python 3.0. See - `GH1274 `_ - for more information. - - Parameters - ---------- - path : string - Location of database file. - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - **kwargs - Keyword arguments passed through to the `sqlite3.connect` function. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.SQLiteStore('data/array.sqldb') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - >>> store.close() # don't forget to call this when you're done - - Store a group:: - - >>> store = zarr.SQLiteStore('data/group.sqldb') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - >>> store.close() # don't forget to call this when you're done - """ - - def __init__(self, path, dimension_separator: Optional[DIMENSION_SEPARATOR] = None, **kwargs): - import sqlite3 - - warnings.warn( - V3_DEPRECATION_MESSAGE.format(store=self.__class__.__name__), - FutureWarning, - stacklevel=2, - ) - - self._dimension_separator = dimension_separator - - # normalize path - if path != ":memory:": - path = os.path.abspath(path) - - # store properties - self.path = path - self.kwargs = kwargs - - # allow threading if SQLite connections are thread-safe - # - # ref: https://www.sqlite.org/releaselog/3_3_1.html - # ref: https://github.com/python/cpython/issues/71377 - check_same_thread = True - if sqlite3.sqlite_version_info >= (3, 3, 1): - check_same_thread = False - - # keep a lock for serializing mutable operations - self.lock = Lock() - - # open database - self.db = sqlite3.connect( - self.path, - detect_types=0, - isolation_level=None, - check_same_thread=check_same_thread, - **self.kwargs, - ) - - # handle keys as `str`s - self.db.text_factory = str - - # get a cursor to read/write to the database - self.cursor = self.db.cursor() - - # initialize database with our table if missing - with self.lock: - self.cursor.execute("CREATE TABLE IF NOT EXISTS zarr(k TEXT PRIMARY KEY, v BLOB)") - - def __getstate__(self): - if self.path == ":memory:": - raise PicklingError("Cannot pickle in-memory SQLite databases") - return self.path, self.kwargs - - def __setstate__(self, state): - path, kwargs = state - self.__init__(path=path, **kwargs) - - def close(self): - """Closes the underlying database.""" - - # close cursor and db objects - self.cursor.close() - self.db.close() - - def __getitem__(self, key): - value = self.cursor.execute("SELECT v FROM zarr WHERE (k = ?)", (key,)) - for (v,) in value: - return v - raise KeyError(key) - - def __setitem__(self, key, value): - self.update({key: value}) - - def __delitem__(self, key): - with self.lock: - self.cursor.execute("DELETE FROM zarr WHERE (k = ?)", (key,)) - if self.cursor.rowcount < 1: - raise KeyError(key) - - def __contains__(self, key): - cs = self.cursor.execute("SELECT COUNT(*) FROM zarr WHERE (k = ?)", (key,)) - for (has,) in cs: - has = bool(has) - return has - - def items(self): - kvs = self.cursor.execute("SELECT k, v FROM zarr") - yield from kvs - - def keys(self): - ks = self.cursor.execute("SELECT k FROM zarr") - for (k,) in ks: - yield k - - def values(self): - vs = self.cursor.execute("SELECT v FROM zarr") - for (v,) in vs: - yield v - - def __iter__(self): - return self.keys() - - def __len__(self): - cs = self.cursor.execute("SELECT COUNT(*) FROM zarr") - for (c,) in cs: - return c - - def update(self, *args, **kwargs): - args += (kwargs,) - - kv_list = [] - for dct in args: - for k, v in dct.items(): - v = ensure_contiguous_ndarray_like(v) - - # Accumulate key-value pairs for storage - kv_list.append((k, v)) - - with self.lock: - self.cursor.executemany("REPLACE INTO zarr VALUES (?, ?)", kv_list) - - def listdir(self, path=None): - path = normalize_storage_path(path) - sep = "_" if path == "" else "/" - keys = self.cursor.execute( - f""" - SELECT DISTINCT SUBSTR(m, 0, INSTR(m, "/")) AS l FROM ( - SELECT LTRIM(SUBSTR(k, LENGTH(?) + 1), "/") || "/" AS m - FROM zarr WHERE k LIKE (? || "{sep}%") - ) ORDER BY l ASC - """, - (path, path), - ) - keys = list(map(operator.itemgetter(0), keys)) - return keys - - def getsize(self, path=None): - path = normalize_storage_path(path) - size = self.cursor.execute( - """ - SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr - WHERE k LIKE (? || "%") AND - 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") - """, - (path, path), - ) - for (s,) in size: - return s - - def rmdir(self, path=None): - path = normalize_storage_path(path) - if path: - with self.lock: - self.cursor.execute('DELETE FROM zarr WHERE k LIKE (? || "/%")', (path,)) - else: - self.clear() - - def clear(self): - with self.lock: - self.cursor.executescript( - """ - BEGIN TRANSACTION; - DROP TABLE zarr; - CREATE TABLE zarr(k TEXT PRIMARY KEY, v BLOB); - COMMIT TRANSACTION; - """ - ) - - -class MongoDBStore(Store): - """Storage class using MongoDB. - - .. note:: This is an experimental feature. - - .. deprecated:: 2.18.0 - MongoDBStore will be removed in Zarr-Python 3.0. See - `GH1274 `_ - for more information. - - Requires the `pymongo `_ - package to be installed. - - Parameters - ---------- - database : string - Name of database - collection : string - Name of collection - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - **kwargs - Keyword arguments passed through to the `pymongo.MongoClient` function. - - Notes - ----- - The maximum chunksize in MongoDB documents is 16 MB. - - """ - - _key = "key" - _value = "value" - - def __init__( - self, - database="mongodb_zarr", - collection="zarr_collection", - dimension_separator: Optional[DIMENSION_SEPARATOR] = None, - **kwargs, - ): - import pymongo - - warnings.warn( - V3_DEPRECATION_MESSAGE.format(store=self.__class__.__name__), - FutureWarning, - stacklevel=2, - ) - - self._database = database - self._collection = collection - self._dimension_separator = dimension_separator - self._kwargs = kwargs - - self.client = pymongo.MongoClient(**self._kwargs) - self.db = self.client.get_database(self._database) - self.collection = self.db.get_collection(self._collection) - - def __getitem__(self, key): - doc = self.collection.find_one({self._key: key}) - - if doc is None: - raise KeyError(key) - else: - return doc[self._value] - - def __setitem__(self, key, value): - value = ensure_bytes(value) - self.collection.replace_one( - {self._key: key}, {self._key: key, self._value: value}, upsert=True - ) - - def __delitem__(self, key): - result = self.collection.delete_many({self._key: key}) - if not result.deleted_count == 1: - raise KeyError(key) - - def __iter__(self): - for f in self.collection.find({}): - yield f[self._key] - - def __len__(self): - return self.collection.count_documents({}) - - def __getstate__(self): - return self._database, self._collection, self._kwargs - - def __setstate__(self, state): - database, collection, kwargs = state - self.__init__(database=database, collection=collection, **kwargs) - - def close(self): - """Cleanup client resources and disconnect from MongoDB.""" - self.client.close() - - def clear(self): - """Remove all items from store.""" - self.collection.delete_many({}) - - -class RedisStore(Store): - """Storage class using Redis. - - .. note:: This is an experimental feature. - - .. deprecated:: 2.18.0 - RedisStore will be removed in Zarr-Python 3.0. See - `GH1274 `_ - for more information. - - Requires the `redis `_ - package to be installed. - - Parameters - ---------- - prefix : string - Name of prefix for Redis keys - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - **kwargs - Keyword arguments passed through to the `redis.Redis` function. - - """ - - def __init__( - self, prefix="zarr", dimension_separator: Optional[DIMENSION_SEPARATOR] = None, **kwargs - ): - import redis - - warnings.warn( - V3_DEPRECATION_MESSAGE.format(store=self.__class__.__name__), - FutureWarning, - stacklevel=2, - ) - - self._prefix = prefix - self._kwargs = kwargs - self._dimension_separator = dimension_separator - - self.client = redis.Redis(**kwargs) - - def _key(self, key): - return f"{self._prefix}:{key}" - - def __getitem__(self, key): - return self.client[self._key(key)] - - def __setitem__(self, key, value): - value = ensure_bytes(value) - self.client[self._key(key)] = value - - def __delitem__(self, key): - count = self.client.delete(self._key(key)) - if not count: - raise KeyError(key) - - def keylist(self): - offset = len(self._key("")) # length of prefix - return [key[offset:].decode("utf-8") for key in self.client.keys(self._key("*"))] - - def keys(self): - yield from self.keylist() - - def __iter__(self): - yield from self.keys() - - def __len__(self): - return len(self.keylist()) - - def __getstate__(self): - return self._prefix, self._kwargs - - def __setstate__(self, state): - prefix, kwargs = state - self.__init__(prefix=prefix, **kwargs) - - def clear(self): - for key in self.keys(): - del self[key] - - -class ConsolidatedMetadataStore(Store): - """A layer over other storage, where the metadata has been consolidated into - a single key. - - The purpose of this class, is to be able to get all of the metadata for - a given array in a single read operation from the underlying storage. - See :func:`zarr.convenience.consolidate_metadata` for how to create this - single metadata key. - - This class loads from the one key, and stores the data in a dict, so that - accessing the keys no longer requires operations on the backend store. - - This class is read-only, and attempts to change the array metadata will - fail, but changing the data is possible. If the backend storage is changed - directly, then the metadata stored here could become obsolete, and - :func:`zarr.convenience.consolidate_metadata` should be called again and the class - re-invoked. The use case is for write once, read many times. - - .. versionadded:: 2.3 - - .. note:: This is an experimental feature. - - Parameters - ---------- - store: Store - Containing the zarr array. - metadata_key: str - The target in the store where all of the metadata are stored. We - assume JSON encoding. - - See Also - -------- - zarr.convenience.consolidate_metadata, zarr.convenience.open_consolidated - - """ - - def __init__(self, store: StoreLike, metadata_key=".zmetadata"): - self.store = Store._ensure_store(store) - - # retrieve consolidated metadata - meta = json_loads(self.store[metadata_key]) - - # check format of consolidated metadata - consolidated_format = meta.get("zarr_consolidated_format", None) - if consolidated_format != 1: - raise MetadataError( - f"unsupported zarr consolidated metadata format: {consolidated_format}" - ) - - # decode metadata - self.meta_store: Store = KVStore(meta["metadata"]) - - def __getitem__(self, key): - return self.meta_store[key] - - def __contains__(self, item): - return item in self.meta_store - - def __iter__(self): - return iter(self.meta_store) - - def __len__(self): - return len(self.meta_store) - - def __delitem__(self, key): - raise ReadOnlyError() - - def __setitem__(self, key, value): - raise ReadOnlyError() - - def getsize(self, path): - return getsize(self.meta_store, path) - - def listdir(self, path): - return listdir(self.meta_store, path) diff --git a/zarr/sync.py b/zarr/sync.py deleted file mode 100644 index ba1c5df5b3..0000000000 --- a/zarr/sync.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -from collections import defaultdict -from threading import Lock -from typing import Protocol - - -class Synchronizer(Protocol): - """Base class for synchronizers.""" - - def __getitem__(self, item): - # see subclasses - ... - - -class ThreadSynchronizer(Synchronizer): - """Provides synchronization using thread locks.""" - - def __init__(self): - self.mutex = Lock() - self.locks = defaultdict(Lock) - - def __getitem__(self, item): - with self.mutex: - return self.locks[item] - - def __getstate__(self): - return True - - def __setstate__(self, *args): - # reinitialize from scratch - self.__init__() - - -class ProcessSynchronizer(Synchronizer): - """Provides synchronization using file locks via the - `fasteners `_ - package. - - Parameters - ---------- - path : string - Path to a directory on a file system that is shared by all processes. - N.B., this should be a *different* path to where you store the array. - - """ - - def __init__(self, path): - self.path = path - - def __getitem__(self, item): - import fasteners - - path = os.path.join(self.path, item) - lock = fasteners.InterProcessLock(path) - return lock - - # pickling and unpickling should be handled automatically diff --git a/zarr/tests/conftest.py b/zarr/tests/conftest.py deleted file mode 100644 index aa73b8691e..0000000000 --- a/zarr/tests/conftest.py +++ /dev/null @@ -1,8 +0,0 @@ -import pathlib - -import pytest - - -@pytest.fixture(params=[str, pathlib.Path]) -def path_type(request): - return request.param diff --git a/zarr/tests/data/store.zip b/zarr/tests/data/store.zip deleted file mode 100644 index 76ba856c62..0000000000 Binary files a/zarr/tests/data/store.zip and /dev/null differ diff --git a/zarr/tests/data/store/foo b/zarr/tests/data/store/foo deleted file mode 100644 index ba0e162e1c..0000000000 --- a/zarr/tests/data/store/foo +++ /dev/null @@ -1 +0,0 @@ -bar \ No newline at end of file diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py deleted file mode 100644 index 2d9553971b..0000000000 --- a/zarr/tests/test_attrs.py +++ /dev/null @@ -1,298 +0,0 @@ -import json - -import pathlib -import pytest - -import zarr -from zarr._storage.store import meta_root -from zarr.attrs import Attributes -from zarr.storage import KVStore, DirectoryStore -from zarr._storage.v3 import KVStoreV3 -from zarr.tests.util import CountingDict, CountingDictV3 -from zarr.hierarchy import group - - -@pytest.fixture(params=[2, 3]) -def zarr_version(request): - return request.param - - -def _init_store(version): - """Use a plain dict() for v2, but KVStoreV3 otherwise.""" - if version == 2: - return dict() - return KVStoreV3(dict()) - - -class TestAttributes: - def init_attributes(self, store, read_only=False, cache=True, zarr_version=2): - root = ".z" if zarr_version == 2 else meta_root - return Attributes(store, key=root + "attrs", read_only=read_only, cache=cache) - - def test_storage(self, zarr_version): - store = _init_store(zarr_version) - root = ".z" if zarr_version == 2 else meta_root - attrs_key = root + "attrs" - a = Attributes(store=store, key=attrs_key) - assert isinstance(a.store, KVStore) - assert "foo" not in a - assert "bar" not in a - assert dict() == a.asdict() - - a["foo"] = "bar" - a["baz"] = 42 - assert attrs_key in store - assert isinstance(store[attrs_key], bytes) - d = json.loads(str(store[attrs_key], "utf-8")) - if zarr_version == 3: - d = d["attributes"] - assert dict(foo="bar", baz=42) == d - - def test_utf8_encoding(self, zarr_version): - project_root = pathlib.Path(zarr.__file__).resolve().parent.parent - fixdir = project_root / "fixture" - testdir = fixdir / "utf8attrs" - if not testdir.exists(): # pragma: no cover - # store the data - should be one-time operation - testdir.mkdir(parents=True, exist_ok=True) - with (testdir / ".zattrs").open("w", encoding="utf-8") as f: - f.write('{"foo": "た"}') - with (testdir / ".zgroup").open("w", encoding="utf-8") as f: - f.write("""{\n "zarr_format": 2\n}""") - - # fixture data - fixture = group(store=DirectoryStore(str(fixdir))) - assert fixture["utf8attrs"].attrs.asdict() == dict(foo="た") - - def test_get_set_del_contains(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) - assert "foo" not in a - a["foo"] = "bar" - a["baz"] = 42 - assert "foo" in a - assert "baz" in a - assert "bar" == a["foo"] - assert 42 == a["baz"] - del a["foo"] - assert "foo" not in a - with pytest.raises(KeyError): - # noinspection PyStatementEffect - a["foo"] - - def test_update_put(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) - assert "foo" not in a - assert "bar" not in a - assert "baz" not in a - - a.update(foo="spam", bar=42, baz=4.2) - assert a["foo"] == "spam" - assert a["bar"] == 42 - assert a["baz"] == 4.2 - - a.put(dict(foo="eggs", bar=84)) - assert a["foo"] == "eggs" - assert a["bar"] == 84 - assert "baz" not in a - - def test_iterators(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) - assert 0 == len(a) - assert set() == set(a) - assert set() == set(a.keys()) - assert set() == set(a.values()) - assert set() == set(a.items()) - - a["foo"] = "bar" - a["baz"] = 42 - - assert 2 == len(a) - assert {"foo", "baz"} == set(a) - assert {"foo", "baz"} == set(a.keys()) - assert {"bar", 42} == set(a.values()) - assert {("foo", "bar"), ("baz", 42)} == set(a.items()) - - def test_read_only(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, read_only=True, zarr_version=zarr_version) - if zarr_version == 2: - store[".zattrs"] = json.dumps(dict(foo="bar", baz=42)).encode("ascii") - else: - store["meta/root/attrs"] = json.dumps(dict(attributes=dict(foo="bar", baz=42))).encode( - "ascii" - ) - assert a["foo"] == "bar" - assert a["baz"] == 42 - with pytest.raises(PermissionError): - a["foo"] = "quux" - with pytest.raises(PermissionError): - del a["foo"] - with pytest.raises(PermissionError): - a.update(foo="quux") - - def test_key_completions(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) - d = a._ipython_key_completions_() - assert "foo" not in d - assert "123" not in d - assert "baz" not in d - assert "asdf;" not in d - a["foo"] = 42 - a["123"] = 4.2 - a["asdf;"] = "ghjkl;" - d = a._ipython_key_completions_() - assert "foo" in d - assert "123" in d - assert "asdf;" in d - assert "baz" not in d - - def test_caching_on(self, zarr_version): - # caching is turned on by default - - # setup store - store = CountingDict() if zarr_version == 2 else CountingDictV3() - attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" - assert 0 == store.counter["__getitem__", attrs_key] - assert 0 == store.counter["__setitem__", attrs_key] - if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") - else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") - assert 0 == store.counter["__getitem__", attrs_key] - assert 1 == store.counter["__setitem__", attrs_key] - - # setup attributes - a = self.init_attributes(store, zarr_version=zarr_version) - - # test __getitem__ causes all attributes to be cached - assert a["foo"] == "xxx" - assert 1 == store.counter["__getitem__", attrs_key] - assert a["bar"] == 42 - assert 1 == store.counter["__getitem__", attrs_key] - assert a["foo"] == "xxx" - assert 1 == store.counter["__getitem__", attrs_key] - - # test __setitem__ updates the cache - a["foo"] = "yyy" - get_cnt = 2 if zarr_version == 2 else 3 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 2 == store.counter["__setitem__", attrs_key] - assert a["foo"] == "yyy" - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 2 == store.counter["__setitem__", attrs_key] - - # test update() updates the cache - a.update(foo="zzz", bar=84) - get_cnt = 3 if zarr_version == 2 else 5 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 3 == store.counter["__setitem__", attrs_key] - assert a["foo"] == "zzz" - assert a["bar"] == 84 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 3 == store.counter["__setitem__", attrs_key] - - # test __contains__ uses the cache - assert "foo" in a - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 3 == store.counter["__setitem__", attrs_key] - assert "spam" not in a - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 3 == store.counter["__setitem__", attrs_key] - - # test __delitem__ updates the cache - del a["bar"] - get_cnt = 4 if zarr_version == 2 else 7 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 4 == store.counter["__setitem__", attrs_key] - assert "bar" not in a - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 4 == store.counter["__setitem__", attrs_key] - - # test refresh() - if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") - else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") - assert get_cnt == store.counter["__getitem__", attrs_key] - a.refresh() - get_cnt = 5 if zarr_version == 2 else 8 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert a["foo"] == "xxx" - assert get_cnt == store.counter["__getitem__", attrs_key] - assert a["bar"] == 42 - assert get_cnt == store.counter["__getitem__", attrs_key] - - def test_caching_off(self, zarr_version): - # setup store - store = CountingDict() if zarr_version == 2 else CountingDictV3() - attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" - assert 0 == store.counter["__getitem__", attrs_key] - assert 0 == store.counter["__setitem__", attrs_key] - - if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") - else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") - assert 0 == store.counter["__getitem__", attrs_key] - assert 1 == store.counter["__setitem__", attrs_key] - - # setup attributes - a = self.init_attributes(store, cache=False, zarr_version=zarr_version) - - # test __getitem__ - assert a["foo"] == "xxx" - assert 1 == store.counter["__getitem__", attrs_key] - assert a["bar"] == 42 - assert 2 == store.counter["__getitem__", attrs_key] - assert a["foo"] == "xxx" - assert 3 == store.counter["__getitem__", attrs_key] - - # test __setitem__ - a["foo"] = "yyy" - get_cnt = 4 if zarr_version == 2 else 5 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 2 == store.counter["__setitem__", attrs_key] - assert a["foo"] == "yyy" - get_cnt = 5 if zarr_version == 2 else 6 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 2 == store.counter["__setitem__", attrs_key] - - # test update() - a.update(foo="zzz", bar=84) - get_cnt = 6 if zarr_version == 2 else 8 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 3 == store.counter["__setitem__", attrs_key] - assert a["foo"] == "zzz" - assert a["bar"] == 84 - get_cnt = 8 if zarr_version == 2 else 10 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 3 == store.counter["__setitem__", attrs_key] - - # test __contains__ - assert "foo" in a - get_cnt = 9 if zarr_version == 2 else 11 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 3 == store.counter["__setitem__", attrs_key] - assert "spam" not in a - get_cnt = 10 if zarr_version == 2 else 12 - assert get_cnt == store.counter["__getitem__", attrs_key] - assert 3 == store.counter["__setitem__", attrs_key] - - def test_wrong_keys(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) - - warning_msg = "only attribute keys of type 'string' will be allowed in the future" - - with pytest.warns(DeprecationWarning, match=warning_msg): - a[1] = "foo" - - with pytest.warns(DeprecationWarning, match=warning_msg): - a.put({1: "foo"}) - - with pytest.warns(DeprecationWarning, match=warning_msg): - a.update({1: "foo"}) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py deleted file mode 100644 index 7d190adc2c..0000000000 --- a/zarr/tests/test_convenience.py +++ /dev/null @@ -1,1047 +0,0 @@ -import atexit -import tempfile -import unittest -from numbers import Integral - -import numpy as np -import pytest -from numcodecs import Adler32, Zlib -from numpy.testing import assert_array_equal - -import zarr -from zarr.convenience import ( - consolidate_metadata, - copy, - copy_store, - load, - open, - open_consolidated, - save, - save_group, - save_array, - copy_all, -) -from zarr.core import Array -from zarr.errors import CopyError -from zarr.hierarchy import Group, group -from zarr.storage import ( - ConsolidatedMetadataStore, - FSStore, - KVStore, - MemoryStore, - atexit_rmtree, - data_root, - meta_root, - getsize, -) -from zarr._storage.store import v3_api_available -from zarr._storage.v3 import ( - ConsolidatedMetadataStoreV3, - DirectoryStoreV3, - FSStoreV3, - KVStoreV3, - MemoryStoreV3, - SQLiteStoreV3, -) -from zarr.tests.util import have_fsspec - -_VERSIONS = (2, 3) if v3_api_available else (2,) - - -def _init_creation_kwargs(zarr_version): - kwargs = {"zarr_version": zarr_version} - if zarr_version == 3: - kwargs["path"] = "dataset" - return kwargs - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_open_array(path_type, zarr_version): - store = tempfile.mkdtemp() - atexit.register(atexit_rmtree, store) - store = path_type(store) - kwargs = _init_creation_kwargs(zarr_version) - - # open array, create if doesn't exist - z = open(store, mode="a", shape=100, **kwargs) - assert isinstance(z, Array) - assert z.shape == (100,) - - # open array, overwrite - z = open(store, mode="w", shape=200, **kwargs) - assert isinstance(z, Array) - assert z.shape == (200,) - - # open array, read-only - z = open(store, mode="r", **kwargs) - assert isinstance(z, Array) - assert z.shape == (200,) - assert z.read_only - - # path not found - with pytest.raises(ValueError): - open("doesnotexist", mode="r") - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_open_group(path_type, zarr_version): - store = tempfile.mkdtemp() - atexit.register(atexit_rmtree, store) - store = path_type(store) - kwargs = _init_creation_kwargs(zarr_version) - - # open group, create if doesn't exist - g = open(store, mode="a", **kwargs) - g.create_group("foo") - assert isinstance(g, Group) - assert "foo" in g - - # open group, overwrite - g = open(store, mode="w", **kwargs) - assert isinstance(g, Group) - assert "foo" not in g - - # open group, read-only - g = open(store, mode="r", **kwargs) - assert isinstance(g, Group) - assert g.read_only - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_save_errors(zarr_version): - with pytest.raises(ValueError): - # no arrays provided - save_group("data/group.zarr", zarr_version=zarr_version) - with pytest.raises(TypeError): - # no array provided - save_array("data/group.zarr", zarr_version=zarr_version) - with pytest.raises(ValueError): - # no arrays provided - save("data/group.zarr", zarr_version=zarr_version) - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_zarr_v3_save_multiple_unnamed(): - x = np.ones(8) - y = np.zeros(8) - store = KVStoreV3(dict()) - # no path provided - save_group(store, x, y, path="dataset", zarr_version=3) - # names become arr_{i} for unnamed *args - assert data_root + "dataset/arr_0/c0" in store - assert data_root + "dataset/arr_1/c0" in store - assert meta_root + "dataset/arr_0.array.json" in store - assert meta_root + "dataset/arr_1.array.json" in store - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_zarr_v3_save_errors(): - x = np.ones(8) - with pytest.raises(ValueError): - # no path provided - save_group("data/group.zr3", x, zarr_version=3) - with pytest.raises(ValueError): - # no path provided - save_array("data/group.zr3", x, zarr_version=3) - with pytest.raises(ValueError): - # no path provided - save("data/group.zr3", x, zarr_version=3) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_lazy_loader(zarr_version): - foo = np.arange(100) - bar = np.arange(100, 0, -1) - store = "data/group.zarr" if zarr_version == 2 else "data/group.zr3" - kwargs = _init_creation_kwargs(zarr_version) - save(store, foo=foo, bar=bar, **kwargs) - loader = load(store, **kwargs) - assert "foo" in loader - assert "bar" in loader - assert "baz" not in loader - assert len(loader) == 2 - assert sorted(loader) == ["bar", "foo"] - assert_array_equal(foo, loader["foo"]) - assert_array_equal(bar, loader["bar"]) - assert "LazyLoader: " in repr(loader) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_load_array(zarr_version): - foo = np.arange(100) - bar = np.arange(100, 0, -1) - store = "data/group.zarr" if zarr_version == 2 else "data/group.zr3" - kwargs = _init_creation_kwargs(zarr_version) - save(store, foo=foo, bar=bar, **kwargs) - - # can also load arrays directly into a numpy array - for array_name in ["foo", "bar"]: - array_path = "dataset/" + array_name if zarr_version == 3 else array_name - array = load(store, path=array_path, zarr_version=zarr_version) - assert isinstance(array, np.ndarray) - if array_name == "foo": - assert_array_equal(foo, array) - else: - assert_array_equal(bar, array) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_tree(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) - g1 = zarr.group(**kwargs) - g1.create_group("foo") - g3 = g1.create_group("bar") - g3.create_group("baz") - g5 = g3.create_group("qux") - g5.create_dataset("baz", shape=100, chunks=10) - assert repr(zarr.tree(g1)) == repr(g1.tree()) - assert str(zarr.tree(g1)) == str(g1.tree()) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("stores_from_path", [False, True]) -@pytest.mark.parametrize( - "with_chunk_store,listable", - [(False, True), (True, True), (False, False)], - ids=["default-listable", "with_chunk_store-listable", "default-unlistable"], -) -def test_consolidate_metadata( - with_chunk_store, zarr_version, listable, monkeypatch, stores_from_path -): - # setup initial data - if stores_from_path: - store = tempfile.mkdtemp() - atexit.register(atexit_rmtree, store) - if with_chunk_store: - chunk_store = tempfile.mkdtemp() - atexit.register(atexit_rmtree, chunk_store) - else: - chunk_store = None - version_kwarg = {"zarr_version": zarr_version} - else: - if zarr_version == 2: - store = MemoryStore() - chunk_store = MemoryStore() if with_chunk_store else None - elif zarr_version == 3: - store = MemoryStoreV3() - chunk_store = MemoryStoreV3() if with_chunk_store else None - version_kwarg = {} - path = "dataset" if zarr_version == 3 else None - z = group(store, chunk_store=chunk_store, path=path, **version_kwarg) - - # Reload the actual store implementation in case str - store_to_copy = z.store - - z.create_group("g1") - g2 = z.create_group("g2") - g2.attrs["hello"] = "world" - arr = g2.create_dataset("arr", shape=(20, 20), chunks=(5, 5), dtype="f8") - assert 16 == arr.nchunks - assert 0 == arr.nchunks_initialized - arr.attrs["data"] = 1 - arr[:] = 1.0 - assert 16 == arr.nchunks_initialized - - if stores_from_path: - # get the actual store class for use with consolidate_metadata - store_class = z._store - else: - store_class = store - - if zarr_version == 3: - # error on v3 if path not provided - with pytest.raises(ValueError): - consolidate_metadata(store_class, path=None) - - with pytest.raises(ValueError): - consolidate_metadata(store_class, path="") - - # perform consolidation - out = consolidate_metadata(store_class, path=path) - assert isinstance(out, Group) - assert ["g1", "g2"] == list(out) - if not stores_from_path: - if zarr_version == 2: - assert isinstance(out._store, ConsolidatedMetadataStore) - assert ".zmetadata" in store - meta_keys = [ - ".zgroup", - "g1/.zgroup", - "g2/.zgroup", - "g2/.zattrs", - "g2/arr/.zarray", - "g2/arr/.zattrs", - ] - else: - assert isinstance(out._store, ConsolidatedMetadataStoreV3) - assert "meta/root/consolidated/.zmetadata" in store - meta_keys = [ - "zarr.json", - meta_root + "dataset.group.json", - meta_root + "dataset/g1.group.json", - meta_root + "dataset/g2.group.json", - meta_root + "dataset/g2/arr.array.json", - "meta/root/consolidated.group.json", - ] - for key in meta_keys: - del store[key] - - # https://github.com/zarr-developers/zarr-python/issues/993 - # Make sure we can still open consolidated on an unlistable store: - if not listable: - fs_memory = pytest.importorskip("fsspec.implementations.memory") - monkeypatch.setattr(fs_memory.MemoryFileSystem, "isdir", lambda x, y: False) - monkeypatch.delattr(fs_memory.MemoryFileSystem, "ls") - fs = fs_memory.MemoryFileSystem() - if zarr_version == 2: - store_to_open = FSStore("", fs=fs) - else: - store_to_open = FSStoreV3("", fs=fs) - - # copy original store to new unlistable store - store_to_open.update(store_to_copy) - - else: - store_to_open = store - - # open consolidated - z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path, **version_kwarg) - assert ["g1", "g2"] == list(z2) - assert "world" == z2.g2.attrs["hello"] - assert 1 == z2.g2.arr.attrs["data"] - assert (z2.g2.arr[:] == 1.0).all() - assert 16 == z2.g2.arr.nchunks - if listable: - assert 16 == z2.g2.arr.nchunks_initialized - else: - with pytest.raises(NotImplementedError): - _ = z2.g2.arr.nchunks_initialized - - if stores_from_path: - # path string is note a BaseStore subclass so cannot be used to - # initialize a ConsolidatedMetadataStore. - if zarr_version == 2: - with pytest.raises(ValueError): - cmd = ConsolidatedMetadataStore(store) - elif zarr_version == 3: - with pytest.raises(ValueError): - cmd = ConsolidatedMetadataStoreV3(store) - else: - # tests del/write on the store - if zarr_version == 2: - cmd = ConsolidatedMetadataStore(store) - with pytest.raises(PermissionError): - del cmd[".zgroup"] - with pytest.raises(PermissionError): - cmd[".zgroup"] = None - else: - cmd = ConsolidatedMetadataStoreV3(store) - with pytest.raises(PermissionError): - del cmd[meta_root + "dataset.group.json"] - with pytest.raises(PermissionError): - cmd[meta_root + "dataset.group.json"] = None - - # test getsize on the store - assert isinstance(getsize(cmd), Integral) - - # test new metadata are not writeable - with pytest.raises(PermissionError): - z2.create_group("g3") - with pytest.raises(PermissionError): - z2.create_dataset("spam", shape=42, chunks=7, dtype="i4") - with pytest.raises(PermissionError): - del z2["g2"] - - # test consolidated metadata are not writeable - with pytest.raises(PermissionError): - z2.g2.attrs["hello"] = "universe" - with pytest.raises(PermissionError): - z2.g2.arr.attrs["foo"] = "bar" - - # test the data are writeable - z2.g2.arr[:] = 2 - assert (z2.g2.arr[:] == 2).all() - - # test invalid modes - with pytest.raises(ValueError): - open_consolidated(store, chunk_store=chunk_store, mode="a", path=path) - with pytest.raises(ValueError): - open_consolidated(store, chunk_store=chunk_store, mode="w", path=path) - with pytest.raises(ValueError): - open_consolidated(store, chunk_store=chunk_store, mode="w-", path=path) - - # make sure keyword arguments are passed through without error - open_consolidated( - store, - chunk_store=chunk_store, - path=path, - cache_attrs=True, - synchronizer=None, - **version_kwarg, - ) - - -@pytest.mark.parametrize( - "options", - ( - {"dimension_separator": "/"}, - {"dimension_separator": "."}, - {"dimension_separator": None}, - ), -) -def test_save_array_separator(tmpdir, options): - data = np.arange(6).reshape((3, 2)) - url = tmpdir.join("test.zarr") - save_array(url, data, **options) - - -class TestCopyStore(unittest.TestCase): - _version = 2 - - def setUp(self): - source = dict() - source["foo"] = b"xxx" - source["bar/baz"] = b"yyy" - source["bar/qux"] = b"zzz" - self.source = source - - def _get_dest_store(self): - return dict() - - def test_no_paths(self): - source = self.source - dest = self._get_dest_store() - copy_store(source, dest) - assert len(source) == len(dest) - for key in source: - assert source[key] == dest[key] - - def test_source_path(self): - source = self.source - # paths should be normalized - for source_path in "bar", "bar/", "/bar", "/bar/": - dest = self._get_dest_store() - copy_store(source, dest, source_path=source_path) - assert 2 == len(dest) - for key in source: - if key.startswith("bar/"): - dest_key = key.split("bar/")[1] - assert source[key] == dest[dest_key] - else: - assert key not in dest - - def test_dest_path(self): - source = self.source - # paths should be normalized - for dest_path in "new", "new/", "/new", "/new/": - dest = self._get_dest_store() - copy_store(source, dest, dest_path=dest_path) - assert len(source) == len(dest) - for key in source: - if self._version == 3: - dest_key = key[:10] + "new/" + key[10:] - else: - dest_key = "new/" + key - assert source[key] == dest[dest_key] - - def test_source_dest_path(self): - source = self.source - # paths should be normalized - for source_path in "bar", "bar/", "/bar", "/bar/": - for dest_path in "new", "new/", "/new", "/new/": - dest = self._get_dest_store() - copy_store(source, dest, source_path=source_path, dest_path=dest_path) - assert 2 == len(dest) - for key in source: - if key.startswith("bar/"): - dest_key = "new/" + key.split("bar/")[1] - assert source[key] == dest[dest_key] - else: - assert key not in dest - assert ("new/" + key) not in dest - - def test_excludes_includes(self): - source = self.source - - # single excludes - dest = self._get_dest_store() - excludes = "f.*" - copy_store(source, dest, excludes=excludes) - assert len(dest) == 2 - - root = "" if self._version == 2 else meta_root - assert root + "foo" not in dest - - # multiple excludes - dest = self._get_dest_store() - excludes = "b.z", ".*x" - copy_store(source, dest, excludes=excludes) - assert len(dest) == 1 - assert root + "foo" in dest - assert root + "bar/baz" not in dest - assert root + "bar/qux" not in dest - - # excludes and includes - dest = self._get_dest_store() - excludes = "b.*" - includes = ".*x" - copy_store(source, dest, excludes=excludes, includes=includes) - assert len(dest) == 2 - assert root + "foo" in dest - assert root + "bar/baz" not in dest - assert root + "bar/qux" in dest - - def test_dry_run(self): - source = self.source - dest = self._get_dest_store() - copy_store(source, dest, dry_run=True) - assert 0 == len(dest) - - def test_if_exists(self): - source = self.source - dest = self._get_dest_store() - root = "" if self._version == 2 else meta_root - dest[root + "bar/baz"] = b"mmm" - - # default ('raise') - with pytest.raises(CopyError): - copy_store(source, dest) - - # explicit 'raise' - with pytest.raises(CopyError): - copy_store(source, dest, if_exists="raise") - - # skip - copy_store(source, dest, if_exists="skip") - assert 3 == len(dest) - assert dest[root + "foo"] == b"xxx" - assert dest[root + "bar/baz"] == b"mmm" - assert dest[root + "bar/qux"] == b"zzz" - - # replace - copy_store(source, dest, if_exists="replace") - assert 3 == len(dest) - assert dest[root + "foo"] == b"xxx" - assert dest[root + "bar/baz"] == b"yyy" - assert dest[root + "bar/qux"] == b"zzz" - - # invalid option - with pytest.raises(ValueError): - copy_store(source, dest, if_exists="foobar") - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestCopyStoreV3(TestCopyStore): - _version = 3 - - def setUp(self): - source = KVStoreV3(dict()) - source["meta/root/foo"] = b"xxx" - source["meta/root/bar/baz"] = b"yyy" - source["meta/root/bar/qux"] = b"zzz" - self.source = source - - def _get_dest_store(self): - return KVStoreV3(dict()) - - def test_mismatched_store_versions(self): - # cannot copy between stores of mixed Zarr versions - dest = KVStore(dict()) - with pytest.raises(ValueError): - copy_store(self.source, dest) - - -def check_copied_array(original, copied, without_attrs=False, expect_props=None): - # setup - source_h5py = original.__module__.startswith("h5py.") - dest_h5py = copied.__module__.startswith("h5py.") - zarr_to_zarr = not (source_h5py or dest_h5py) - h5py_to_h5py = source_h5py and dest_h5py - zarr_to_h5py = not source_h5py and dest_h5py - h5py_to_zarr = source_h5py and not dest_h5py - if expect_props is None: - expect_props = dict() - else: - expect_props = expect_props.copy() - - # common properties in zarr and h5py - for p in "dtype", "shape", "chunks": - expect_props.setdefault(p, getattr(original, p)) - - # zarr-specific properties - if zarr_to_zarr: - for p in "compressor", "filters", "order", "fill_value": - expect_props.setdefault(p, getattr(original, p)) - - # h5py-specific properties - if h5py_to_h5py: - for p in ( - "maxshape", - "compression", - "compression_opts", - "shuffle", - "scaleoffset", - "fletcher32", - "fillvalue", - ): - expect_props.setdefault(p, getattr(original, p)) - - # common properties with some name differences - if h5py_to_zarr: - expect_props.setdefault("fill_value", original.fillvalue) - if zarr_to_h5py: - expect_props.setdefault("fillvalue", original.fill_value) - - # compare properties - for k, v in expect_props.items(): - assert v == getattr(copied, k) - - # compare data - assert_array_equal(original[:], copied[:]) - - # compare attrs - if without_attrs: - for k in original.attrs.keys(): - assert k not in copied.attrs - else: - if dest_h5py and "filters" in original.attrs: - # special case in v3 (storing filters metadata under attributes) - # we explicitly do not copy this info over to HDF5 - original_attrs = original.attrs.asdict().copy() - original_attrs.pop("filters") - else: - original_attrs = original.attrs - assert sorted(original_attrs.items()) == sorted(copied.attrs.items()) - - -def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): - # setup - if expect_props is None: - expect_props = dict() - else: - expect_props = expect_props.copy() - - # compare children - for k, v in original.items(): - if hasattr(v, "shape"): - assert k in copied - check_copied_array(v, copied[k], without_attrs=without_attrs, expect_props=expect_props) - elif shallow: - assert k not in copied - else: - assert k in copied - check_copied_group( - v, - copied[k], - without_attrs=without_attrs, - shallow=shallow, - expect_props=expect_props, - ) - - # compare attrs - if without_attrs: - for k in original.attrs.keys(): - assert k not in copied.attrs - else: - assert sorted(original.attrs.items()) == sorted(copied.attrs.items()) - - -def test_copy_all(): - """ - https://github.com/zarr-developers/zarr-python/issues/269 - - copy_all used to not copy attributes as `.keys()` does not return hidden `.zattrs`. - - """ - original_group = zarr.group(store=MemoryStore(), overwrite=True) - original_group.attrs["info"] = "group attrs" - original_subgroup = original_group.create_group("subgroup") - original_subgroup.attrs["info"] = "sub attrs" - - destination_group = zarr.group(store=MemoryStore(), overwrite=True) - - # copy from memory to directory store - copy_all( - original_group, - destination_group, - dry_run=False, - ) - - assert "subgroup" in destination_group - assert destination_group.attrs["info"] == "group attrs" - assert destination_group.subgroup.attrs["info"] == "sub attrs" - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_copy_all_v3(): - """ - https://github.com/zarr-developers/zarr-python/issues/269 - - copy_all used to not copy attributes as `.keys()` - - """ - original_group = zarr.group(store=MemoryStoreV3(), path="group1", overwrite=True) - original_group.create_group("subgroup") - - destination_group = zarr.group(store=MemoryStoreV3(), path="group2", overwrite=True) - - # copy from memory to directory store - copy_all( - original_group, - destination_group, - dry_run=False, - ) - assert "subgroup" in destination_group - - -class TestCopy: - @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) - def source(self, request, tmpdir): - def prep_source(source): - foo = source.create_group("foo") - foo.attrs["experiment"] = "weird science" - baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) - baz.attrs["units"] = "metres" - if request.param: - extra_kws = dict( - compression="gzip", - compression_opts=3, - fillvalue=84, - shuffle=True, - fletcher32=True, - ) - else: - extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) - source.create_dataset( - "spam", - data=np.arange(100, 200).reshape(20, 5), - chunks=(10, 2), - dtype="i2", - **extra_kws, - ) - return source - - if request.param: - h5py = pytest.importorskip("h5py") - fn = tmpdir.join("source.h5") - with h5py.File(str(fn), mode="w") as h5f: - yield prep_source(h5f) - else: - yield prep_source(group()) - - @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) - def dest(self, request, tmpdir): - if request.param: - h5py = pytest.importorskip("h5py") - fn = tmpdir.join("dest.h5") - with h5py.File(str(fn), mode="w") as h5f: - yield h5f - else: - yield group() - - def test_copy_array(self, source, dest): - # copy array with default options - copy(source["foo/bar/baz"], dest) - check_copied_array(source["foo/bar/baz"], dest["baz"]) - copy(source["spam"], dest) - check_copied_array(source["spam"], dest["spam"]) - - def test_copy_bad_dest(self, source, dest): - # try to copy to an array, dest must be a group - dest = dest.create_dataset("eggs", shape=(100,)) - with pytest.raises(ValueError): - copy(source["foo/bar/baz"], dest) - - def test_copy_array_name(self, source, dest): - # copy array with name - copy(source["foo/bar/baz"], dest, name="qux") - assert "baz" not in dest - check_copied_array(source["foo/bar/baz"], dest["qux"]) - - def test_copy_array_create_options(self, source, dest): - dest_h5py = dest.__module__.startswith("h5py.") - - # copy array, provide creation options - compressor = Zlib(9) - create_kws = dict(chunks=(10,)) - if dest_h5py: - create_kws.update( - compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 - ) - else: - create_kws.update(compressor=compressor, fill_value=42, order="F", filters=[Adler32()]) - copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) - check_copied_array( - source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws - ) - - def test_copy_array_exists_array(self, source, dest): - # copy array, dest array in the way - dest.create_dataset("baz", shape=(10,)) - - # raise - with pytest.raises(CopyError): - # should raise by default - copy(source["foo/bar/baz"], dest) - assert (10,) == dest["baz"].shape - with pytest.raises(CopyError): - copy(source["foo/bar/baz"], dest, if_exists="raise") - assert (10,) == dest["baz"].shape - - # skip - copy(source["foo/bar/baz"], dest, if_exists="skip") - assert (10,) == dest["baz"].shape - - # replace - copy(source["foo/bar/baz"], dest, if_exists="replace") - check_copied_array(source["foo/bar/baz"], dest["baz"]) - - # invalid option - with pytest.raises(ValueError): - copy(source["foo/bar/baz"], dest, if_exists="foobar") - - def test_copy_array_exists_group(self, source, dest): - # copy array, dest group in the way - dest.create_group("baz") - - # raise - with pytest.raises(CopyError): - copy(source["foo/bar/baz"], dest) - assert not hasattr(dest["baz"], "shape") - with pytest.raises(CopyError): - copy(source["foo/bar/baz"], dest, if_exists="raise") - assert not hasattr(dest["baz"], "shape") - - # skip - copy(source["foo/bar/baz"], dest, if_exists="skip") - assert not hasattr(dest["baz"], "shape") - - # replace - copy(source["foo/bar/baz"], dest, if_exists="replace") - check_copied_array(source["foo/bar/baz"], dest["baz"]) - - def test_copy_array_skip_initialized(self, source, dest): - dest_h5py = dest.__module__.startswith("h5py.") - - dest.create_dataset("baz", shape=(100,), chunks=(10,), dtype="i8") - assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) - - if dest_h5py: - with pytest.raises(ValueError): - # not available with copy to h5py - copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") - - else: - # copy array, dest array exists but not yet initialized - copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") - check_copied_array(source["foo/bar/baz"], dest["baz"]) - - # copy array, dest array exists and initialized, will be skipped - dest["baz"][:] = np.arange(100, 200) - copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") - assert_array_equal(np.arange(100, 200), dest["baz"][:]) - assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) - - def test_copy_group(self, source, dest): - # copy group, default options - copy(source["foo"], dest) - check_copied_group(source["foo"], dest["foo"]) - - def test_copy_group_no_name(self, source, dest): - with pytest.raises(TypeError): - # need a name if copy root - copy(source, dest) - - copy(source, dest, name="root") - check_copied_group(source, dest["root"]) - - def test_copy_group_options(self, source, dest): - # copy group, non-default options - copy(source["foo"], dest, name="qux", without_attrs=True) - assert "foo" not in dest - check_copied_group(source["foo"], dest["qux"], without_attrs=True) - - def test_copy_group_shallow(self, source, dest): - # copy group, shallow - copy(source, dest, name="eggs", shallow=True) - check_copied_group(source, dest["eggs"], shallow=True) - - def test_copy_group_exists_group(self, source, dest): - # copy group, dest groups exist - dest.create_group("foo/bar") - copy(source["foo"], dest) - check_copied_group(source["foo"], dest["foo"]) - - def test_copy_group_exists_array(self, source, dest): - # copy group, dest array in the way - dest.create_dataset("foo/bar", shape=(10,)) - - # raise - with pytest.raises(CopyError): - copy(source["foo"], dest) - assert dest["foo/bar"].shape == (10,) - with pytest.raises(CopyError): - copy(source["foo"], dest, if_exists="raise") - assert dest["foo/bar"].shape == (10,) - - # skip - copy(source["foo"], dest, if_exists="skip") - assert dest["foo/bar"].shape == (10,) - - # replace - copy(source["foo"], dest, if_exists="replace") - check_copied_group(source["foo"], dest["foo"]) - - def test_copy_group_dry_run(self, source, dest): - # dry run, empty destination - n_copied, n_skipped, n_bytes_copied = copy( - source["foo"], dest, dry_run=True, return_stats=True - ) - assert 0 == len(dest) - assert 3 == n_copied - assert 0 == n_skipped - assert 0 == n_bytes_copied - - # dry run, array exists in destination - baz = np.arange(100, 200) - dest.create_dataset("foo/bar/baz", data=baz) - assert not np.all(source["foo/bar/baz"][:] == dest["foo/bar/baz"][:]) - assert 1 == len(dest) - - # raise - with pytest.raises(CopyError): - copy(source["foo"], dest, dry_run=True) - assert 1 == len(dest) - - # skip - n_copied, n_skipped, n_bytes_copied = copy( - source["foo"], dest, dry_run=True, if_exists="skip", return_stats=True - ) - assert 1 == len(dest) - assert 2 == n_copied - assert 1 == n_skipped - assert 0 == n_bytes_copied - assert_array_equal(baz, dest["foo/bar/baz"]) - - # replace - n_copied, n_skipped, n_bytes_copied = copy( - source["foo"], dest, dry_run=True, if_exists="replace", return_stats=True - ) - assert 1 == len(dest) - assert 3 == n_copied - assert 0 == n_skipped - assert 0 == n_bytes_copied - assert_array_equal(baz, dest["foo/bar/baz"]) - - def test_logging(self, source, dest, tmpdir): - # callable log - copy(source["foo"], dest, dry_run=True, log=print) - - # file name - fn = str(tmpdir.join("log_name")) - copy(source["foo"], dest, dry_run=True, log=fn) - - # file - with tmpdir.join("log_file").open(mode="w") as f: - copy(source["foo"], dest, dry_run=True, log=f) - - # bad option - with pytest.raises(TypeError): - copy(source["foo"], dest, dry_run=True, log=True) - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestCopyV3(TestCopy): - @pytest.fixture(params=["zarr", "hdf5"]) - def source(self, request, tmpdir): - def prep_source(source): - foo = source.create_group("foo") - foo.attrs["experiment"] = "weird science" - baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) - baz.attrs["units"] = "metres" - if request.param == "hdf5": - extra_kws = dict( - compression="gzip", - compression_opts=3, - fillvalue=84, - shuffle=True, - fletcher32=True, - ) - else: - extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) - source.create_dataset( - "spam", - data=np.arange(100, 200).reshape(20, 5), - chunks=(10, 2), - dtype="i2", - **extra_kws, - ) - return source - - if request.param == "hdf5": - h5py = pytest.importorskip("h5py") - fn = tmpdir.join("source.h5") - with h5py.File(str(fn), mode="w") as h5f: - yield prep_source(h5f) - elif request.param == "zarr": - yield prep_source(group(path="group1", zarr_version=3)) - - # Test with various destination StoreV3 types as TestCopyV3 covers rmdir - destinations = ["hdf5", "zarr", "zarr_kvstore", "zarr_directorystore", "zarr_sqlitestore"] - if have_fsspec: - destinations += ["zarr_fsstore"] - - @pytest.fixture(params=destinations) - def dest(self, request, tmpdir): - if request.param == "hdf5": - h5py = pytest.importorskip("h5py") - fn = tmpdir.join("dest.h5") - with h5py.File(str(fn), mode="w") as h5f: - yield h5f - elif request.param == "zarr": - yield group(path="group2", zarr_version=3) - elif request.param == "zarr_kvstore": - store = KVStoreV3(dict()) - yield group(store, path="group2", zarr_version=3) - elif request.param == "zarr_fsstore": - fn = tmpdir.join("dest.zr3") - store = FSStoreV3(str(fn), auto_mkdir=True) - yield group(store, path="group2", zarr_version=3) - elif request.param == "zarr_directorystore": - fn = tmpdir.join("dest.zr3") - store = DirectoryStoreV3(str(fn)) - yield group(store, path="group2", zarr_version=3) - elif request.param == "zarr_sqlitestore": - fn = tmpdir.join("dest.db") - store = SQLiteStoreV3(str(fn)) - yield group(store, path="group2", zarr_version=3) - - def test_copy_array_create_options(self, source, dest): - dest_h5py = dest.__module__.startswith("h5py.") - - # copy array, provide creation options - compressor = Zlib(9) - create_kws = dict(chunks=(10,)) - if dest_h5py: - create_kws.update( - compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 - ) - else: - # v3 case has no filters argument in zarr create_kws - create_kws.update(compressor=compressor, fill_value=42, order="F") - copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) - check_copied_array( - source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws - ) - - def test_copy_group_no_name(self, source, dest): - if source.__module__.startswith("h5py"): - with pytest.raises(TypeError): - copy(source, dest) - else: - # For v3, dest.name will be inferred from source.name - copy(source, dest) - check_copied_group(source, dest[source.name.lstrip("/")]) - - copy(source, dest, name="root") - check_copied_group(source, dest["root"]) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py deleted file mode 100644 index 4729dc01b6..0000000000 --- a/zarr/tests/test_core.py +++ /dev/null @@ -1,3225 +0,0 @@ -import atexit -import os -import sys -import pickle -import shutil -from typing import Any, Literal, Optional, Tuple, Union, Sequence -import unittest -from itertools import zip_longest -from tempfile import mkdtemp -import numpy as np -import packaging.version -import pytest -from numcodecs import ( - BZ2, - JSON, - LZ4, - Blosc, - Categorize, - Delta, - FixedScaleOffset, - GZip, - MsgPack, - Pickle, - VLenArray, - VLenBytes, - VLenUTF8, - Zlib, -) -from numcodecs.abc import Codec -from numcodecs.compat import ensure_bytes, ensure_ndarray -from numcodecs.tests.common import greetings -from numpy.testing import assert_array_almost_equal, assert_array_equal - -import zarr -from zarr._storage.store import ( - BaseStore, - v3_api_available, -) -from .._storage.v3_storage_transformers import ShardingStorageTransformer, v3_sharding_available -from zarr.core import Array -from zarr.errors import ArrayNotFoundError, ContainsGroupError -from zarr.meta import json_loads -from zarr.n5 import N5Store, N5FSStore, n5_keywords -from zarr.storage import ( - ABSStore, - DBMStore, - DirectoryStore, - FSStore, - KVStore, - LMDBStore, - LRUStoreCache, - NestedDirectoryStore, - SQLiteStore, - atexit_rmglob, - atexit_rmtree, - data_root, - init_array, - init_group, - meta_root, - normalize_store_arg, -) -from zarr._storage.v3 import ( - ABSStoreV3, - DBMStoreV3, - DirectoryStoreV3, - FSStoreV3, - KVStoreV3, - LMDBStoreV3, - LRUStoreCacheV3, - RmdirV3, - SQLiteStoreV3, - StoreV3, -) -from zarr.tests.test_storage_v3 import DummyStorageTransfomer -from zarr.util import buffer_size -from zarr.tests.util import ( - abs_container, - have_bsddb3, - have_fsspec, - have_lmdb, - have_sqlite3, - mktemp, - skip_test_env_var, -) -from zarr.types import DIMENSION_SEPARATOR - -# noinspection PyMethodMayBeStatic - - -class TestArray: - version = 2 - root = "" - path = "" - compressor = Zlib(level=1) - filters: Optional[Sequence[Codec]] = None - dimension_separator: Optional[DIMENSION_SEPARATOR] = None - cache_metadata = True - cache_attrs = True - partial_decompress: bool = False - write_empty_chunks = True - read_only = False - storage_transformers: Tuple[Any, ...] = () - - def create_store(self) -> BaseStore: - return KVStore(dict()) - - # used by child classes - def create_chunk_store(self) -> Optional[BaseStore]: - return None - - def create_storage_transformers(self, shape: Union[int, Tuple[int, ...]]) -> Tuple[Any, ...]: - return () - - def create_filters(self, dtype: Optional[str]) -> Tuple[Any, ...]: - return () - - def create_array(self, shape: Union[int, Tuple[int, ...]], **kwargs): - store = self.create_store() - chunk_store = self.create_chunk_store() - # keyword arguments for array initialization - init_array_kwargs = { - "path": kwargs.pop("path", self.path), - "compressor": kwargs.pop("compressor", self.compressor), - "chunk_store": chunk_store, - "storage_transformers": self.create_storage_transformers(shape), - "filters": kwargs.pop("filters", self.create_filters(kwargs.get("dtype"))), - } - - # keyword arguments for array instantiation - access_array_kwargs = { - "path": init_array_kwargs["path"], - "read_only": kwargs.pop("read_only", self.read_only), - "chunk_store": chunk_store, - "cache_metadata": kwargs.pop("cache_metadata", self.cache_metadata), - "cache_attrs": kwargs.pop("cache_attrs", self.cache_attrs), - "partial_decompress": kwargs.pop("partial_decompress", self.partial_decompress), - "write_empty_chunks": kwargs.pop("write_empty_chunks", self.write_empty_chunks), - } - - init_array(store, shape, **{**init_array_kwargs, **kwargs}) - - return Array(store, **access_array_kwargs) - - def test_array_init(self): - # normal initialization - store = self.create_store() - init_array(store, shape=100, chunks=10, dtype=" end - assert [] == list(z.islice(6, 5)) - - z.store.close() - - def test_iter(self): - params = ( - ((1,), (1,)), - ((2,), (1,)), - ((1,), (2,)), - ((3,), (3,)), - ((1000,), (100,)), - ((100,), (1000,)), - ((1, 100), (1, 1)), - ((1, 0), (1, 1)), - ((0, 1), (1, 1)), - ((0, 1), (2, 1)), - ((100, 1), (3, 1)), - ((100, 100), (10, 10)), - ((10, 10, 10), (3, 3, 3)), - ) - for shape, chunks in params: - z = self.create_array(shape=shape, chunks=chunks, dtype=int) - a = np.arange(np.prod(shape)).reshape(shape) - z[:] = a - for expect, actual in zip_longest(a, z): - assert_array_equal(expect, actual) - z.store.close() - - def test_islice(self): - params = ( - ((1,), (1,), 0, 1), - ((2,), (1,), 0, 1), - ((1,), (2,), 0, 1), - ((3,), (3,), 1, 2), - ((1000,), (100,), 150, 1050), - ((100,), (1000,), 25, 75), - ((1, 100), (1, 1), 0, 1), - ((100, 1), (3, 1), 56, 100), - ((100, 100), (10, 10), 13, 99), - ((10, 10, 10), (3, 3, 3), 2, 4), - ) - for shape, chunks, start, end in params: - z = self.create_array(shape=shape, chunks=chunks, dtype=int) - a = np.arange(np.prod(shape)).reshape(shape) - z[:] = a - end_array = min(end, a.shape[0]) - for expect, actual in zip_longest(a[start:end_array], z.islice(start, end)): - assert_array_equal(expect, actual) - if hasattr(z.store, "close"): - z.store.close() - - def test_compressors(self): - compressors = [None, BZ2(), Blosc(), LZ4(), Zlib(), GZip()] - if LZMA: - compressors.append(LZMA()) - for compressor in compressors: - a = self.create_array(shape=1000, chunks=100, compressor=compressor) - a[0:100] = 1 - assert np.all(a[0:100] == 1) - a[:] = 1 - assert np.all(a[:] == 1) - a.store.close() - - def test_endian(self): - dtype = np.dtype("float32") - a1 = self.create_array(shape=1000, chunks=100, dtype=dtype.newbyteorder("<")) - a1[:] = 1 - x1 = a1[:] - a2 = self.create_array(shape=1000, chunks=100, dtype=dtype.newbyteorder(">")) - a2[:] = 1 - x2 = a2[:] - assert_array_equal(x1, x2) - a1.store.close() - a2.store.close() - - def test_attributes(self): - a = self.create_array(shape=10, chunks=10, dtype="i8") - a.attrs["foo"] = "bar" - assert a.attrs.key in a.store - attrs = json_loads(a.store[a.attrs.key]) - if self.version > 2: - # in v3, attributes are in a sub-dictionary of the metadata - attrs = attrs["attributes"] - assert "foo" in attrs and attrs["foo"] == "bar" - - a.attrs["bar"] = "foo" - assert a.attrs.key in a.store - attrs = json_loads(a.store[a.attrs.key]) - if self.version > 2: - # in v3, attributes are in a sub-dictionary of the metadata - attrs = attrs["attributes"] - assert "foo" in attrs and attrs["foo"] == "bar" - assert "bar" in attrs and attrs["bar"] == "foo" - a.store.close() - - def test_structured_with_object(self): - a = self.create_array( - fill_value=(0.0, None), - shape=10, - chunks=10, - dtype=[("x", float), ("y", object)], - object_codec=Pickle(), - ) - assert tuple(a[0]) == (0.0, None) - - -class TestArrayWithPath(TestArray): - path = "foo/bar" - compressor = Blosc() - - def test_nchunks_initialized(self): - pass - - def expected(self): - return [ - "f710da18d45d38d4aaf2afd7fb822fdd73d02957", - "1437428e69754b1e1a38bd7fc9e43669577620db", - "6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe", - "4c0a76fb1222498e09dcd92f7f9221d6cea8b40e", - "05b0663ffe1785f38d3a459dec17e57a18f254af", - ] - - def test_nbytes_stored(self): - # MemoryStore as store - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum( - buffer_size(v) for k, v in z.store.items() if k.startswith("foo/bar/") - ) - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum( - buffer_size(v) for k, v in z.store.items() if k.startswith("foo/bar/") - ) - assert expect_nbytes_stored == z.nbytes_stored - - # mess with store - z.store[z._key_prefix + "foo"] = list(range(10)) - assert -1 == z.nbytes_stored - - -class TestArrayWithChunkStore(TestArray): - compressor = Blosc() - - def create_chunk_store(self): - return KVStore(dict()) - - def expected(self): - return [ - "f710da18d45d38d4aaf2afd7fb822fdd73d02957", - "1437428e69754b1e1a38bd7fc9e43669577620db", - "6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe", - "4c0a76fb1222498e09dcd92f7f9221d6cea8b40e", - "05b0663ffe1785f38d3a459dec17e57a18f254af", - ] - - def test_nbytes_stored(self): - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) - expect_nbytes_stored += sum(buffer_size(v) for v in z.chunk_store.values()) - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) - expect_nbytes_stored += sum(buffer_size(v) for v in z.chunk_store.values()) - assert expect_nbytes_stored == z.nbytes_stored - - # mess with store - z.chunk_store[z._key_prefix + "foo"] = list(range(10)) - assert -1 == z.nbytes_stored - - -class TestArrayWithDirectoryStore(TestArray): - def create_store(self): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - store = DirectoryStore(path) - return store - - def test_nbytes_stored(self): - # dict as store - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) - assert expect_nbytes_stored == z.nbytes_stored - - -def test_array_init_from_dict(): - # initialization via non-Store MutableMapping - store = dict() - init_array(store, shape=100, chunks=10, dtype=" Tuple[Any, ...]: - return ( - Delta(dtype=dtype), - FixedScaleOffset(dtype=dtype, scale=1, offset=0), - ) - - def expected(self): - return [ - "b80367c5599d47110d42bd8886240c2f46620dba", - "95a7b2471225e73199c9716d21e8d3dd6e5f6f2a", - "7300f1eb130cff5891630038fd99c28ef23d3a01", - "c649ad229bc5720258b934ea958570c2f354c2eb", - "62fc9236d78af18a5ec26c12eea1d33bce52501e", - ] - - def test_astype_no_filters(self): - shape = (100,) - dtype = np.dtype(np.int8) - astype = np.dtype(np.float32) - - store = KVStore(dict()) - init_array(store, shape=shape, chunks=10, dtype=dtype) - - data = np.arange(np.prod(shape), dtype=dtype).reshape(shape) - - z1 = Array(store) - z1[...] = data - z2 = z1.astype(astype) - - expected = data.astype(astype) - assert_array_equal(expected, z2) - assert z2.read_only - - def test_astype(self): - shape = (100,) - chunks = (10,) - - dtype = np.dtype(np.int8) - astype = np.dtype(np.float32) - - data = np.arange(np.prod(shape), dtype=dtype).reshape(shape) - - z1 = self.create_array(shape=shape, chunks=chunks, dtype=dtype) - z1[...] = data - z2 = z1.astype(astype) - - expected = data.astype(astype) - assert_array_equal(expected, z2) - - def test_array_dtype_shape(self): - # skip this one, cannot do delta on unstructured array - pass - - def test_structured_array(self): - # skip this one, cannot do delta on structured array - pass - - def test_structured_array_subshapes(self): - # skip this one, cannot do delta on structured array - pass - - def test_structured_array_nested(self): - # skip this one, cannot do delta on structured array - pass - - def test_dtypes(self): - # skip this one, delta messes up floats - pass - - def test_object_arrays(self): - # skip this one, cannot use delta with objects - pass - - def test_object_arrays_vlen_text(self): - # skip this one, cannot use delta with objects - pass - - def test_object_arrays_vlen_bytes(self): - # skip this one, cannot use delta with objects - pass - - def test_object_arrays_vlen_array(self): - # skip this one, cannot use delta with objects - pass - - def test_object_arrays_danger(self): - # skip this one, cannot use delta with objects - pass - - def test_structured_array_contain_object(self): - # skip this one, cannot use delta on structured array - pass - - -# custom store, does not support getsize() -class CustomMapping: - def __init__(self): - self.inner = KVStore(dict()) - - def __iter__(self): - return iter(self.keys()) - - def keys(self): - return self.inner.keys() - - def values(self): - return self.inner.values() - - def get(self, item, default=None): - try: - return self.inner[item] - except KeyError: - return default - - def __getitem__(self, item): - return self.inner[item] - - def __setitem__(self, item, value): - self.inner[item] = ensure_bytes(value) - - def __delitem__(self, key): - del self.inner[key] - - def __contains__(self, item): - return item in self.inner - - def close(self): - return self.inner.close() - - -class TestArrayWithCustomMapping(TestArray): - def create_store(self): - return CustomMapping() - - def test_nbytes_stored(self): - z = self.create_array(shape=1000, chunks=100) - assert 245 == z.nbytes_stored - z[:] = 42 - assert 515 == z.nbytes_stored - - -class TestArrayNoCache(TestArray): - def test_cache_metadata(self): - a1 = self.create_array(shape=100, chunks=10, dtype="i1", cache_metadata=False) - path = None if self.version == 2 else a1.path - a2 = Array(a1.store, path=path, cache_metadata=True) - assert a1.shape == a2.shape - assert a1.size == a2.size - assert a1.nbytes == a2.nbytes - assert a1.nchunks == a2.nchunks - - # a1 is not caching so *will* see updates made via other objects - a2.resize(200) - assert (200,) == a2.shape - assert 200 == a2.size - assert 200 == a2.nbytes - assert 20 == a2.nchunks - assert a1.shape == a2.shape - assert a1.size == a2.size - assert a1.nbytes == a2.nbytes - assert a1.nchunks == a2.nchunks - - a2.append(np.zeros(100)) - assert (300,) == a2.shape - assert 300 == a2.size - assert 300 == a2.nbytes - assert 30 == a2.nchunks - assert a1.shape == a2.shape - assert a1.size == a2.size - assert a1.nbytes == a2.nbytes - assert a1.nchunks == a2.nchunks - - # a2 is caching so *will not* see updates made via other objects - a1.resize(400) - assert (400,) == a1.shape - assert 400 == a1.size - assert 400 == a1.nbytes - assert 40 == a1.nchunks - assert (300,) == a2.shape - assert 300 == a2.size - assert 300 == a2.nbytes - assert 30 == a2.nchunks - - def test_cache_attrs(self): - a1 = self.create_array(shape=100, chunks=10, dtype="i1", cache_attrs=False) - path = None if self.version == 2 else "arr1" - a2 = Array(a1.store, path=path, cache_attrs=True) - assert a1.attrs.asdict() == a2.attrs.asdict() - - # a1 is not caching so *will* see updates made via other objects - a2.attrs["foo"] = "xxx" - a2.attrs["bar"] = 42 - assert a1.attrs.asdict() == a2.attrs.asdict() - - # a2 is caching so *will not* see updates made via other objects - a1.attrs["foo"] = "yyy" - assert "yyy" == a1.attrs["foo"] - assert "xxx" == a2.attrs["foo"] - - def test_object_arrays_danger(self): - # skip this one as it only works if metadata are cached - pass - - -class TestArrayWithStoreCache(TestArray): - def create_store(self): - return LRUStoreCache(dict(), max_size=None) - - def test_store_has_bytes_values(self): - # skip as the cache has no control over how the store provides values - pass - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestArrayWithFSStore(TestArray): - compressor = Blosc() - dimension_separator: Literal[".", "/"] = "." - - def create_store(self): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = self.dimension_separator - store = FSStore( - path, - key_separator=key_separator, - auto_mkdir=True, - check=True, - create=True, - missing_exceptions=None, - ) - return store - - def expected(self): - return [ - "ab753fc81df0878589535ca9bad2816ba88d91bc", - "c16261446f9436b1e9f962e57ce3e8f6074abe8a", - "c2ef3b2fb2bc9dcace99cd6dad1a7b66cc1ea058", - "6e52f95ac15b164a8e96843a230fcee0e610729b", - "091fa99bc60706095c9ce30b56ce2503e0223f56", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestArrayWithFSStoreFromFilesystem(TestArray): - compressor = Blosc() - dimension_separator = "." - - def create_store(self): - from fsspec.implementations.local import LocalFileSystem - - fs = LocalFileSystem(auto_mkdir=True) - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = self.dimension_separator - store = FSStore( - path, - fs=fs, - key_separator=key_separator, - check=True, - create=True, - missing_exceptions=None, - ) - return store - - def expected(self): - return [ - "ab753fc81df0878589535ca9bad2816ba88d91bc", - "c16261446f9436b1e9f962e57ce3e8f6074abe8a", - "c2ef3b2fb2bc9dcace99cd6dad1a7b66cc1ea058", - "6e52f95ac15b164a8e96843a230fcee0e610729b", - "091fa99bc60706095c9ce30b56ce2503e0223f56", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestArrayWithFSStorePartialRead(TestArray): - compressor = Blosc(blocksize=256) - partial_decompress = True - - def create_store(self): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - store = FSStore(path) - return store - - def expected(self): - return [ - "dd7577d645c38767cf6f6d1ef8fd64002883a014", - "aa0de9892cf1ed3cda529efbf3233720b84489b7", - "e6191c44cf958576c29c41cef0f55b028a4dbdff", - "88adeeabb819feecccadf50152293dbb42f9107e", - "1426e084427f9920e29c9ec81b663d1005849455", - ] - - def test_non_cont(self): - z = self.create_array(shape=(500, 500, 500), chunks=(50, 50, 50), dtype=" BaseStore: - path = mkdtemp() - atexit.register(shutil.rmtree, path) - return DirectoryStoreV3(path) - - def test_nbytes_stored(self): - # dict as store - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - - -@skip_test_env_var("ZARR_TEST_ABS") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithABSStoreV3(TestArrayV3): - def create_store(self) -> ABSStoreV3: - client = abs_container() - store = ABSStoreV3(client=client) - store.rmdir() - return store - - -# TODO: TestArrayWithN5StoreV3 -# class TestArrayWithN5StoreV3(TestArrayWithDirectoryStoreV3): - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithDBMStoreV3(TestArrayV3): - def create_store(self) -> DBMStoreV3: - path = mktemp(suffix=".anydbm") - atexit.register(atexit_rmglob, path + "*") - store = DBMStoreV3(path, flag="n") - return store - - def test_nbytes_stored(self): - pass # not implemented - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -@pytest.mark.skipif(have_bsddb3 is False, reason="needs bsddb3") -class TestArrayWithDBMStoreV3BerkeleyDB(TestArrayV3): - def create_store(self) -> DBMStoreV3: - import bsddb3 - - path = mktemp(suffix=".dbm") - atexit.register(os.remove, path) - store = DBMStoreV3(path, flag="n", open=bsddb3.btopen) - return store - - def test_nbytes_stored(self): - pass # not implemented - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -@pytest.mark.skipif(have_lmdb is False, reason="needs lmdb") -class TestArrayWithLMDBStoreV3(TestArrayV3): - lmdb_buffers = True - - def create_store(self) -> LMDBStoreV3: - path = mktemp(suffix=".lmdb") - atexit.register(atexit_rmtree, path) - store = LMDBStoreV3(path, buffers=self.lmdb_buffers) - return store - - def test_store_has_bytes_values(self): - pass # returns values as memoryviews/buffers instead of bytes - - def test_nbytes_stored(self): - pass # not implemented - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithLMDBStoreV3NoBuffers(TestArrayWithLMDBStoreV3): - lmdb_buffers = False - - def test_nbytes_stored(self): - pass # not implemented - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -@pytest.mark.skipif(have_sqlite3 is False, reason="needs sqlite3") -class TestArrayWithSQLiteStoreV3(TestArrayV3): - def create_store(self): - path = mktemp(suffix=".db") - atexit.register(atexit_rmtree, path) - store = SQLiteStoreV3(path) - return store - - def test_nbytes_stored(self): - pass # not implemented - - -# skipped adding V3 equivalents for compressors (no change in v3): -# TestArrayWithNoCompressor -# TestArrayWithBZ2Compressor -# TestArrayWithBloscCompressor -# TestArrayWithLZMACompressor - -# skipped test with filters (v3 protocol removed filters) -# TestArrayWithFilters - - -# custom store, does not support getsize() -# Note: this custom mapping doesn't actually have all methods in the -# v3 spec (e.g. erase), but they aren't needed here. - - -class CustomMappingV3(RmdirV3, StoreV3): - def __init__(self): - self.inner = KVStoreV3(dict()) - - def __iter__(self): - return iter(self.keys()) - - def __len__(self): - return len(self.inner) - - def keys(self): - return self.inner.keys() - - def values(self): - return self.inner.values() - - def get(self, item, default=None): - try: - return self.inner[item] - except KeyError: - return default - - def __getitem__(self, item): - return self.inner[item] - - def __setitem__(self, item, value): - self.inner[item] = ensure_bytes(value) - - def __delitem__(self, key): - del self.inner[key] - - def __contains__(self, item): - return item in self.inner - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithCustomMappingV3(TestArrayV3): - def create_store(self): - store = CustomMappingV3() - return store - - def test_nbytes_stored(self): - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - - def test_len(self): - # dict as store - z = self.create_array(shape=1000, chunks=100) - assert len(z._store) == 2 - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayNoCacheV3(TestArrayWithPathV3): - def create_store(self): - store = KVStoreV3(dict()) - return store - - def test_object_arrays_danger(self): - # skip this one as it only works if metadata are cached - pass - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithStoreCacheV3(TestArrayV3): - def create_store(self): - store = LRUStoreCacheV3(dict(), max_size=None) - return store - - def test_store_has_bytes_values(self): - # skip as the cache has no control over how the store provides values - pass - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3(TestArrayV3): - compressor = Blosc() - - def create_store(self): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = self.dimension_separator - store = FSStoreV3( - path, - key_separator=key_separator, - auto_mkdir=True, - create=True, - check=True, - missing_exceptions=None, - ) - return store - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3FromFilesystem(TestArrayWithFSStoreV3): - def create_store(self): - from fsspec.implementations.local import LocalFileSystem - - fs = LocalFileSystem(auto_mkdir=True) - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = self.dimension_separator - store = FSStoreV3( - path, - fs=fs, - key_separator=key_separator, - create=True, - check=True, - missing_exceptions=None, - ) - return store - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3PartialRead(TestArrayWithFSStoreV3): - partial_decompress = True - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -@pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") -class TestArrayWithFSStoreV3PartialReadUncompressedSharded(TestArrayWithFSStoreV3): - partial_decompress = True - compressor = None - - def create_storage_transformers(self, shape) -> Tuple[Any]: - num_dims = 1 if isinstance(shape, int) else len(shape) - sharding_transformer = ShardingStorageTransformer( - "indexed", chunks_per_shard=(2,) * num_dims - ) - return (sharding_transformer,) - - def test_nbytes_stored(self): - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - - def test_supports_efficient_get_set_partial_values(self): - z = self.create_array(shape=100, chunks=10) - assert z.chunk_store.supports_efficient_get_partial_values - assert not z.chunk_store.supports_efficient_set_partial_values() - - def expected(self): - return [ - "90109fc2a4e17efbcb447003ea1c08828b91f71e", - "2b73519f7260dba3ddce0d2b70041888856fec6b", - "bca5798be2ed71d444f3045b05432d937682b7dd", - "9ff1084501e28520e577662a6e3073f1116c76a2", - "882a97cad42417f90f111d0cb916a21579650467", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3Nested(TestArrayWithFSStoreV3): - dimension_separator = "/" - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3NestedPartialRead(TestArrayWithFSStoreV3): - dimension_separator = "/" - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithStorageTransformersV3(TestArrayWithChunkStoreV3): - def create_storage_transformers(self, shape) -> Tuple[Any]: - return ( - DummyStorageTransfomer("dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT), - ) - - def expected(self): - return [ - "3fb9a4f8233b09ad02067b6b7fc9fd5caa405c7d", - "89c8eb364beb84919fc9153d2c1ed2696274ec18", - "73307055c3aec095dd1232c38d793ef82a06bd97", - "6152c09255a5efa43b1a115546e35affa00c138c", - "2f8802fc391f67f713302e84fad4fd8f1366d6c2", - ] - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -@pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") -class TestArrayWithShardingStorageTransformerV3(TestArrayV3): - compressor = None - - def create_storage_transformers(self, shape) -> Tuple[Any]: - num_dims = 1 if isinstance(shape, int) else len(shape) - return (ShardingStorageTransformer("indexed", chunks_per_shard=(2,) * num_dims),) - - def test_nbytes_stored(self): - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - - # mess with store - z.store[data_root + z._key_prefix + "foo"] = list(range(10)) - assert -1 == z.nbytes_stored - - def test_keys_inner_store(self): - z = self.create_array(shape=1000, chunks=100) - assert z.chunk_store.keys() == z._store.keys() - meta_keys = set(z.store.keys()) - z[:] = 42 - assert len(z.chunk_store.keys() - meta_keys) == 10 - # inner store should have half the data keys, - # since chunks_per_shard is 2: - assert len(z._store.keys() - meta_keys) == 5 - - def test_supports_efficient_get_set_partial_values(self): - z = self.create_array(shape=100, chunks=10) - assert not z.chunk_store.supports_efficient_get_partial_values - assert not z.chunk_store.supports_efficient_set_partial_values() - - def expected(self): - return [ - "90109fc2a4e17efbcb447003ea1c08828b91f71e", - "2b73519f7260dba3ddce0d2b70041888856fec6b", - "bca5798be2ed71d444f3045b05432d937682b7dd", - "9ff1084501e28520e577662a6e3073f1116c76a2", - "882a97cad42417f90f111d0cb916a21579650467", - ] - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_array_mismatched_store_versions(): - store_v3 = KVStoreV3(dict()) - store_v2 = KVStore(dict()) - - # separate chunk store - chunk_store_v2 = KVStore(dict()) - chunk_store_v3 = KVStoreV3(dict()) - - init_kwargs = dict(shape=100, chunks=10, dtype="""" - - data = np.arange(25).reshape((5, 5)) - ds = zarr.create( - shape=data.shape, - chunks=(5, 5), - dtype=data.dtype, - compressor=(None), - store=FSStore(url=str(tmpdir), mode="a"), - order="F", - ) - - ds[:] = data - - ds_reopened = zarr.open_array(store=FSStore(url=str(tmpdir), mode="r")) - - written_data = ds_reopened[:] - assert_array_equal(data, written_data) - - -def test_scalar_indexing(): - store = zarr.KVStore({}) - - store["a"] = zarr.create((3,), chunks=(1,), store=store) - store["a"][:] = [1, 2, 3] - - assert store["a"][1] == np.array(2.0) - assert store["a"][(1,)] == np.array(2.0) - - store["a"][slice(1)] = [-1] - assert store["a"][0] == np.array(-1) - - store["a"][0] = -2 - assert store["a"][0] == np.array(-2) - - store["a"][slice(1)] = (-3,) - assert store["a"][0] == np.array(-3) - - -def test_object_array_indexing(): - # regression test for #1874 - from numcodecs import MsgPack - - root = zarr.group() - arr = root.create_dataset( - name="my_dataset", - shape=0, - dtype=object, - object_codec=MsgPack(), - ) - new_items = [ - ["A", 1], - ["B", 2, "hello"], - ] - arr_add = np.empty(len(new_items), dtype=object) - arr_add[:] = new_items - arr.append(arr_add) - - # heterogeneous elements - elem = ["C", 3] - arr[0] = elem - assert arr[0] == elem - - # homogeneous elements - elem = [1, 3] - arr[1] = elem - assert arr[1] == elem - - -@pytest.mark.parametrize("shape", ((1, 1, 1), (5, 5, 1), (1, 5, 5))) -def test_scalar_orthogonal_indexing(shape): - # regression test for https://github.com/zarr-developers/zarr-python/issues/1931 - store = zarr.MemoryStore({}) - data = np.random.randint(0, 255, shape) - arr = zarr.zeros( - shape=shape, chunks=shape[:-1] + (1,), compressor=None, store=store, dtype="u1" - ) - arr[:, :, :] = data - store.close() - - zf = zarr.open(store, "r") - assert_array_equal(zf[0, :, :], data[0, :, :]) - assert_array_equal(zf[:, 0, :], data[:, 0, :]) - assert_array_equal(zf[:, :, 0], data[:, :, 0]) diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py deleted file mode 100644 index 8e586abfff..0000000000 --- a/zarr/tests/test_creation.py +++ /dev/null @@ -1,774 +0,0 @@ -import atexit -import os.path -import shutil -import warnings - -import numpy as np -import pytest -from numpy.testing import assert_array_equal - -from zarr._storage.store import DEFAULT_ZARR_VERSION -from zarr.codecs import Zlib -from zarr.core import Array -from zarr.creation import ( - array, - create, - empty, - empty_like, - full, - full_like, - ones, - ones_like, - open_array, - open_like, - zeros, - zeros_like, -) -from zarr.hierarchy import open_group -from zarr.n5 import N5Store -from zarr.storage import DirectoryStore, KVStore -from zarr._storage.store import v3_api_available -from zarr._storage.v3 import DirectoryStoreV3, KVStoreV3 -from zarr.sync import ThreadSynchronizer -from zarr.tests.test_storage_v3 import DummyStorageTransfomer -from zarr.tests.util import mktemp, have_fsspec - - -_VERSIONS = (None, 2, 3) if v3_api_available else (None, 2) -_VERSIONS2 = (2, 3) if v3_api_available else (2,) - - -# something bcolz-like -class MockBcolzArray: - def __init__(self, data, chunklen): - self.data = data - self.chunklen = chunklen - - def __getattr__(self, item): - return getattr(self.data, item) - - def __getitem__(self, item): - return self.data[item] - - -# something h5py-like -class MockH5pyDataset: - def __init__(self, data, chunks): - self.data = data - self.chunks = chunks - - def __getattr__(self, item): - return getattr(self.data, item) - - def __getitem__(self, item): - return self.data[item] - - -def _init_creation_kwargs(zarr_version, at_root=True): - kwargs = {"zarr_version": zarr_version} - if not at_root: - kwargs["path"] = "array" - return kwargs - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_array(zarr_version, at_root): - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version - kwargs = _init_creation_kwargs(zarr_version, at_root) - - # with numpy array - a = np.arange(100) - z = array(a, chunks=10, **kwargs) - assert a.shape == z.shape - assert a.dtype == z.dtype - assert z._store._store_version == expected_zarr_version - assert_array_equal(a, z[:]) - - # with array-like - a = list(range(100)) - z = array(a, chunks=10, **kwargs) - assert (100,) == z.shape - assert np.asarray(a).dtype == z.dtype - assert_array_equal(np.asarray(a), z[:]) - - # with another zarr array - z2 = array(z, **kwargs) - assert z.shape == z2.shape - assert z.chunks == z2.chunks - assert z.dtype == z2.dtype - assert_array_equal(z[:], z2[:]) - - # with chunky array-likes - - b = np.arange(1000).reshape(100, 10) - c = MockBcolzArray(b, 10) - z3 = array(c, **kwargs) - assert c.shape == z3.shape - assert (10, 10) == z3.chunks - - b = np.arange(1000).reshape(100, 10) - c = MockH5pyDataset(b, chunks=(10, 2)) - z4 = array(c, **kwargs) - assert c.shape == z4.shape - assert (10, 2) == z4.chunks - - c = MockH5pyDataset(b, chunks=None) - z5 = array(c, **kwargs) - assert c.shape == z5.shape - assert isinstance(z5.chunks, tuple) - - # with dtype=None - a = np.arange(100, dtype="i4") - z = array(a, dtype=None, **kwargs) - assert_array_equal(a[:], z[:]) - assert a.dtype == z.dtype - - # with dtype=something else - a = np.arange(100, dtype="i4") - z = array(a, dtype="i8", **kwargs) - assert_array_equal(a[:], z[:]) - assert np.dtype("i8") == z.dtype - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_empty(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - z = empty(100, chunks=10, **kwargs) - assert (100,) == z.shape - assert (10,) == z.chunks - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_zeros(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - z = zeros(100, chunks=10, **kwargs) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.zeros(100), z[:]) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_ones(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - z = ones(100, chunks=10, **kwargs) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.ones(100), z[:]) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_full(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - z = full(100, chunks=10, fill_value=42, dtype="i4", **kwargs) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.full(100, fill_value=42, dtype="i4"), z[:]) - - # nan - z = full(100, chunks=10, fill_value=np.nan, dtype="f8", **kwargs) - assert np.all(np.isnan(z[:])) - - -@pytest.mark.parametrize("zarr_version", [None, 2]) # TODO -def test_full_additional_dtypes(zarr_version): - """Test additional types that aren't part of the base v3 spec.""" - kwargs = _init_creation_kwargs(zarr_version) - # NaT - z = full(100, chunks=10, fill_value="NaT", dtype="M8[s]", **kwargs) - assert np.all(np.isnat(z[:])) - z = full(100, chunks=10, fill_value="NaT", dtype="m8[s]", **kwargs) - assert np.all(np.isnat(z[:])) - - # byte string dtype - v = b"xxx" - z = full(100, chunks=10, fill_value=v, dtype="S3", **kwargs) - assert v == z[0] - a = z[...] - assert z.dtype == a.dtype - assert v == a[0] - assert np.all(a == v) - - # unicode string dtype - v = "xxx" - z = full(100, chunks=10, fill_value=v, dtype="U3", **kwargs) - assert v == z[0] - a = z[...] - assert z.dtype == a.dtype - assert v == a[0] - assert np.all(a == v) - - # bytes fill value / unicode dtype - v = b"xxx" - with pytest.raises(ValueError): - full(100, chunks=10, fill_value=v, dtype="U3") - - -@pytest.mark.parametrize("dimension_separator", [".", "/", None]) -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_open_array(zarr_version, at_root, dimension_separator): - store = "data/array.zarr" - kwargs = _init_creation_kwargs(zarr_version, at_root) - - # mode == 'w' - z = open_array( - store, mode="w", shape=100, chunks=10, dimension_separator=dimension_separator, **kwargs - ) - z[:] = 42 - assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.full(100, fill_value=42), z[:]) - - if dimension_separator is None: - assert z._dimension_separator == "/" if zarr_version == 3 else "." - else: - assert z._dimension_separator == dimension_separator - - # mode in 'r', 'r+' - group_kwargs = kwargs.copy() - if zarr_version == 3: - group_kwargs["path"] = "group" - open_group("data/group.zarr", mode="w", **group_kwargs) - for mode in "r", "r+": - with pytest.raises(ValueError): - open_array("doesnotexist", mode=mode) - with pytest.raises(ValueError): - open_array("data/group.zarr", mode=mode) - z = open_array(store, mode="r", **kwargs) - assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.full(100, fill_value=42), z[:]) - with pytest.raises(PermissionError): - z[:] = 43 - z = open_array(store, mode="r+", **kwargs) - assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.full(100, fill_value=42), z[:]) - z[:] = 43 - assert_array_equal(np.full(100, fill_value=43), z[:]) - - # mode == 'a' - shutil.rmtree(store) - z = open_array(store, mode="a", shape=100, chunks=10, **kwargs) - z[:] = 42 - assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.full(100, fill_value=42), z[:]) - - expected_error = TypeError if zarr_version == 3 else ValueError - # v3 path does not conflict, but will raise TypeError without shape kwarg - with pytest.raises(expected_error): - # array would end up at data/group.zarr/meta/root/array.array.json - open_array("data/group.zarr", mode="a", **kwargs) - - # mode in 'w-', 'x' - for mode in "w-", "x": - shutil.rmtree(store) - z = open_array(store, mode=mode, shape=100, chunks=10, **kwargs) - z[:] = 42 - assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.full(100, fill_value=42), z[:]) - with pytest.raises(ValueError): - open_array(store, mode=mode, **kwargs) - expected_error = TypeError if zarr_version == 3 else ValueError - # v3 path does not conflict, but will raise TypeError without shape kwarg - with pytest.raises(expected_error): - open_array("data/group.zarr", mode=mode, **kwargs) - - # with synchronizer - z = open_array(store, synchronizer=ThreadSynchronizer(), **kwargs) - assert isinstance(z, Array) - - # with path - kwargs_no_path = kwargs.copy() - kwargs_no_path.pop("path", None) - z = open_array(store, shape=100, path="foo/bar", mode="w", **kwargs_no_path) - assert isinstance(z, Array) - assert "foo/bar" == z.path - - # with chunk store - meta_store = "data/meta.zarr" - chunk_store = "data/chunks.zarr" - z = open_array(store=meta_store, chunk_store=chunk_store, shape=11, mode="w", **kwargs) - z[:] = 42 - assert os.path.abspath(meta_store) == z.store.path - assert os.path.abspath(chunk_store) == z.chunk_store.path - - -def test_open_array_none(): - # open with both store and zarr_version = None - z = open_array(mode="w", shape=100, chunks=10) - assert isinstance(z, Array) - assert z._version == 2 - - -@pytest.mark.parametrize("dimension_separator", [".", "/", None]) -@pytest.mark.parametrize("zarr_version", _VERSIONS2) -def test_open_array_infer_separator_from_store(zarr_version, dimension_separator): - if zarr_version == 3: - StoreClass = DirectoryStoreV3 - path = "data" - else: - StoreClass = DirectoryStore - path = None - store = StoreClass("data/array.zarr", dimension_separator=dimension_separator) - - # Note: no dimension_separator kwarg to open_array - # we are testing here that it gets inferred from store - z = open_array(store, path=path, mode="w", shape=100, chunks=10) - z[:] = 42 - assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.full(100, fill_value=42), z[:]) - - if dimension_separator is None: - assert z._dimension_separator == "/" if zarr_version == 3 else "." - else: - assert z._dimension_separator == dimension_separator - - -# TODO: N5 support for v3 -@pytest.mark.parametrize("zarr_version", [None, 2]) -def test_open_array_n5(zarr_version): - store = "data/array.zarr" - kwargs = _init_creation_kwargs(zarr_version) - - # for N5 store - store = "data/array.n5" - z = open_array(store, mode="w", shape=100, chunks=10, **kwargs) - z[:] = 42 - assert isinstance(z, Array) - assert isinstance(z.store, N5Store) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.full(100, fill_value=42), z[:]) - - store = "data/group.n5" - group_kwargs = kwargs.copy() - # if zarr_version == 3: - # group_kwargs['path'] = 'group' - z = open_group(store, mode="w", **group_kwargs) - i = z.create_group("inner") - a = i.zeros("array", shape=100, chunks=10) - a[:] = 42 - - # Edit inner/attributes.json to not include "n5" - with open("data/group.n5/inner/attributes.json", "w") as o: - o.write("{}") - - # Re-open - a = open_group(store, **group_kwargs)["inner"]["array"] - assert isinstance(a, Array) - assert isinstance(z.store, N5Store) - assert (100,) == a.shape - assert (10,) == a.chunks - assert_array_equal(np.full(100, fill_value=42), a[:]) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_open_array_dict_store(zarr_version, at_root): - # dict will become a KVStore - store = dict() - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_store_type = KVStoreV3 if zarr_version == 3 else KVStore - - # mode == 'w' - z = open_array(store, mode="w", shape=100, chunks=10, **kwargs) - z[:] = 42 - assert isinstance(z, Array) - assert isinstance(z.store, expected_store_type) - assert (100,) == z.shape - assert (10,) == z.chunks - assert_array_equal(np.full(100, fill_value=42), z[:]) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_create_in_dict(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_store_type = KVStoreV3 if zarr_version == 3 else KVStore - - for func in [empty, zeros, ones]: - a = func(100, store=dict(), **kwargs) - assert isinstance(a.store, expected_store_type) - - a = full(100, 5, store=dict(), **kwargs) - assert isinstance(a.store, expected_store_type) - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_create_writeable_mode(zarr_version, at_root, tmp_path): - # Regression test for https://github.com/zarr-developers/zarr-python/issues/1306 - import fsspec - - kwargs = _init_creation_kwargs(zarr_version, at_root) - store = fsspec.get_mapper(str(tmp_path)) - z = create(100, store=store, **kwargs) - assert z.store.map == store - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_empty_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version - - # zarr array - z = empty(100, chunks=10, dtype="f4", compressor=Zlib(5), order="F", **kwargs) - # zarr_version will be inferred from z, but have to specify a path in v3 - z2 = empty_like(z, path=kwargs.get("path")) - assert z.shape == z2.shape - assert z.chunks == z2.chunks - assert z.dtype == z2.dtype - assert z.compressor.get_config() == z2.compressor.get_config() - assert z.fill_value == z2.fill_value - assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version - - # numpy array - a = np.empty(100, dtype="f4") - z3 = empty_like(a, **kwargs) - assert a.shape == z3.shape - assert (100,) == z3.chunks - assert a.dtype == z3.dtype - assert z3.fill_value is None - assert z3._store._store_version == expected_zarr_version - - # something slightly silly - a = [0] * 100 - z3 = empty_like(a, shape=200, **kwargs) - assert (200,) == z3.shape - - # other array-likes - b = np.arange(1000).reshape(100, 10) - c = MockBcolzArray(b, 10) - z = empty_like(c, **kwargs) - assert b.shape == z.shape - assert (10, 10) == z.chunks - c = MockH5pyDataset(b, chunks=(10, 2)) - z = empty_like(c, **kwargs) - assert b.shape == z.shape - assert (10, 2) == z.chunks - c = MockH5pyDataset(b, chunks=None) - z = empty_like(c, **kwargs) - assert b.shape == z.shape - assert isinstance(z.chunks, tuple) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_zeros_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version - - # zarr array - z = zeros(100, chunks=10, dtype="f4", compressor=Zlib(5), order="F", **kwargs) - z2 = zeros_like(z, path=kwargs.get("path")) - assert z.shape == z2.shape - assert z.chunks == z2.chunks - assert z.dtype == z2.dtype - assert z.compressor.get_config() == z2.compressor.get_config() - assert z.fill_value == z2.fill_value - assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version - # numpy array - a = np.empty(100, dtype="f4") - z3 = zeros_like(a, chunks=10, **kwargs) - assert a.shape == z3.shape - assert (10,) == z3.chunks - assert a.dtype == z3.dtype - assert 0 == z3.fill_value - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_ones_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version - - # zarr array - z = ones(100, chunks=10, dtype="f4", compressor=Zlib(5), order="F", **kwargs) - z2 = ones_like(z, path=kwargs.get("path")) - assert z.shape == z2.shape - assert z.chunks == z2.chunks - assert z.dtype == z2.dtype - assert z.compressor.get_config() == z2.compressor.get_config() - assert z.fill_value == z2.fill_value - assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version - # numpy array - a = np.empty(100, dtype="f4") - z3 = ones_like(a, chunks=10, **kwargs) - assert a.shape == z3.shape - assert (10,) == z3.chunks - assert a.dtype == z3.dtype - assert 1 == z3.fill_value - assert z3._store._store_version == expected_zarr_version - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_full_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version - - z = full(100, chunks=10, dtype="f4", compressor=Zlib(5), fill_value=42, order="F", **kwargs) - z2 = full_like(z, path=kwargs.get("path")) - assert z.shape == z2.shape - assert z.chunks == z2.chunks - assert z.dtype == z2.dtype - assert z.compressor.get_config() == z2.compressor.get_config() - assert z.fill_value == z2.fill_value - assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version - # numpy array - a = np.empty(100, dtype="f4") - z3 = full_like(a, chunks=10, fill_value=42, **kwargs) - assert a.shape == z3.shape - assert (10,) == z3.chunks - assert a.dtype == z3.dtype - assert 42 == z3.fill_value - assert z3._store._store_version == expected_zarr_version - with pytest.raises(TypeError): - # fill_value missing - full_like(a, chunks=10, **kwargs) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_open_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version - - # zarr array - path = mktemp() - atexit.register(shutil.rmtree, path) - z = full(100, chunks=10, dtype="f4", compressor=Zlib(5), fill_value=42, order="F", **kwargs) - z2 = open_like(z, path) - assert z.shape == z2.shape - assert z.chunks == z2.chunks - assert z.dtype == z2.dtype - assert z.compressor.get_config() == z2.compressor.get_config() - assert z.fill_value == z2.fill_value - assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version - # numpy array - path = mktemp() - atexit.register(shutil.rmtree, path) - a = np.empty(100, dtype="f4") - z3 = open_like(a, path, chunks=10, zarr_version=zarr_version) - assert a.shape == z3.shape - assert (10,) == z3.chunks - assert a.dtype == z3.dtype - assert 0 == z3.fill_value - assert z3._store._store_version == expected_zarr_version - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_create(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version - - # defaults - z = create(100, **kwargs) - assert isinstance(z, Array) - assert (100,) == z.shape - assert (100,) == z.chunks # auto-chunks - assert np.dtype(None) == z.dtype - assert "blosc" == z.compressor.codec_id - assert 0 == z.fill_value - assert z._store._store_version == expected_zarr_version - - # all specified - z = create(100, chunks=10, dtype="i4", compressor=Zlib(1), fill_value=42, order="F", **kwargs) - assert isinstance(z, Array) - assert (100,) == z.shape - assert (10,) == z.chunks - assert np.dtype("i4") == z.dtype - assert "zlib" == z.compressor.codec_id - assert 1 == z.compressor.level - assert 42 == z.fill_value - assert "F" == z.order - assert z._store._store_version == expected_zarr_version - - # with synchronizer - synchronizer = ThreadSynchronizer() - z = create(100, chunks=10, synchronizer=synchronizer, **kwargs) - assert isinstance(z, Array) - assert (100,) == z.shape - assert (10,) == z.chunks - assert synchronizer is z.synchronizer - assert z._store._store_version == expected_zarr_version - - # don't allow string as compressor arg - with pytest.raises(ValueError): - create(100, chunks=10, compressor="zlib", **kwargs) - - # h5py compatibility - - z = create(100, compression="zlib", compression_opts=9, **kwargs) - assert "zlib" == z.compressor.codec_id - assert 9 == z.compressor.level - - z = create(100, compression="default", **kwargs) - assert "blosc" == z.compressor.codec_id - - # errors - with pytest.raises(ValueError): - # bad compression argument - create(100, compression=1, **kwargs) - with pytest.raises(ValueError): - # bad fill value - create(100, dtype="i4", fill_value="foo", **kwargs) - - # auto chunks - z = create(1000000000, chunks=True, **kwargs) - assert z.chunks[0] < z.shape[0] - z = create(1000000000, chunks=None, **kwargs) # backwards-compatibility - assert z.chunks[0] < z.shape[0] - # no chunks - z = create(1000000000, chunks=False, **kwargs) - assert z.chunks == z.shape - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_compression_args(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) - - with warnings.catch_warnings(): - warnings.simplefilter("default") - z = create(100, compression="zlib", compression_opts=9, **kwargs) - assert isinstance(z, Array) - assert "zlib" == z.compressor.codec_id - assert 9 == z.compressor.level - - # 'compressor' overrides 'compression' - with pytest.warns(UserWarning): - z = create(100, compressor=Zlib(9), compression="bz2", compression_opts=1, **kwargs) - assert isinstance(z, Array) - assert "zlib" == z.compressor.codec_id - assert 9 == z.compressor.level - - # 'compressor' ignores 'compression_opts' - with pytest.warns(UserWarning): - z = create(100, compressor=Zlib(9), compression_opts=1, **kwargs) - assert isinstance(z, Array) - assert "zlib" == z.compressor.codec_id - assert 9 == z.compressor.level - - with pytest.warns(UserWarning): - # 'compressor' overrides 'compression' - create(100, compressor=Zlib(9), compression="bz2", compression_opts=1, **kwargs) - with pytest.warns(UserWarning): - # 'compressor' ignores 'compression_opts' - create(100, compressor=Zlib(9), compression_opts=1, **kwargs) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_create_read_only(zarr_version, at_root): - # https://github.com/alimanfoo/zarr/issues/151 - - kwargs = _init_creation_kwargs(zarr_version, at_root) - - # create an array initially read-only, then enable writing - z = create(100, read_only=True, **kwargs) - assert z.read_only - with pytest.raises(PermissionError): - z[:] = 42 - z.read_only = False - z[:] = 42 - assert np.all(z[...] == 42) - z.read_only = True - with pytest.raises(PermissionError): - z[:] = 0 - - # this is subtly different, but here we want to create an array with data, and then - # have it be read-only - a = np.arange(100) - z = array(a, read_only=True, **kwargs) - assert_array_equal(a, z[...]) - assert z.read_only - with pytest.raises(PermissionError): - z[:] = 42 - - -def test_json_dumps_chunks_numpy_dtype(): - z = zeros((10,), chunks=(np.int64(2),)) - assert np.all(z[...] == 0) - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -@pytest.mark.parametrize("at_root", [False, True]) -def test_create_with_storage_transformers(at_root): - kwargs = _init_creation_kwargs(zarr_version=3, at_root=at_root) - transformer = DummyStorageTransfomer( - "dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT - ) - z = create(1000000000, chunks=True, storage_transformers=[transformer], **kwargs) - assert isinstance(z.chunk_store, DummyStorageTransfomer) - assert z.chunk_store.test_value == DummyStorageTransfomer.TEST_CONSTANT - - -@pytest.mark.parametrize( - ("init_shape", "init_chunks", "shape", "chunks"), - ( - ((1,), (1,), (1,), (1,)), - ((1.0,), (1.0,), (1,), (1,)), - ((1.0,), False, (1,), (1,)), - ((1.0,), True, (1,), (1,)), - ((1.0,), None, (1,), (1,)), - ), -) -def test_shape_chunk_ints(init_shape, init_chunks, shape, chunks): - g = open_group() - array = g.create_dataset("ds", shape=init_shape, chunks=init_chunks, dtype=np.uint8) - - assert all( - isinstance(s, int) for s in array.shape - ), f"Expected shape to be all ints but found {array.shape=}." - assert all( - isinstance(c, int) for c in array.chunks - ), f"Expected chunks to be all ints but found {array.chunks=}." - assert array.shape == shape, f"Expected {shape=} but found {array.shape=}." - assert array.chunks == chunks, f"Expected {chunks=} but found {array.chunks=}." diff --git a/zarr/tests/test_dim_separator.py b/zarr/tests/test_dim_separator.py deleted file mode 100644 index 0a5814e65f..0000000000 --- a/zarr/tests/test_dim_separator.py +++ /dev/null @@ -1,135 +0,0 @@ -import pathlib - -import pytest -from numpy.testing import assert_array_equal -from functools import partial - -import zarr -from zarr.core import Array -from zarr.storage import DirectoryStore, NestedDirectoryStore, FSStore -from zarr.tests.util import have_fsspec - - -needs_fsspec = pytest.mark.skipif(not have_fsspec, reason="needs fsspec") - - -@pytest.fixture( - params=( - "static_flat", - "static_flat_legacy", - "static_nested", - "static_nested_legacy", - "directory_nested", - "directory_flat", - "directory_default", - "nesteddirectory_nested", - "nesteddirectory_default", - pytest.param("fs_nested", marks=needs_fsspec), - pytest.param("fs_flat", marks=needs_fsspec), - pytest.param("fs_default", marks=needs_fsspec), - ) -) -def dataset(tmpdir, request): - """ - Generate a variety of different Zarrs using - different store implementations as well as - different dimension_separator arguments. - """ - - loc = tmpdir.join("dim_sep_test.zarr") - which = request.param - kwargs = {} - - if which.startswith("static"): - project_root = pathlib.Path(zarr.__file__).resolve().parent.parent - suffix = which[len("static_") :] - static = project_root / "fixture" / suffix - - if not static.exists(): # pragma: no cover - if "nested" in which: - # No way to reproduce the nested_legacy file via code - generator = NestedDirectoryStore - else: - if "legacy" in suffix: - # No dimension_separator metadata included - generator = DirectoryStore - else: - # Explicit dimension_separator metadata included - generator = partial(DirectoryStore, dimension_separator=".") - - # store the data - should be one-time operation - s = generator(str(static)) - a = zarr.open(store=s, mode="w", shape=(2, 2), dtype=" 2 and g1.store.is_erasable(): - arr_path = g1.path + "/arr1" - sfx = _get_metadata_suffix(g1.store) - array_meta_file = meta_root + arr_path + ".array" + sfx - assert array_meta_file in g1.store - group_meta_file = meta_root + g2.path + ".group" + sfx - assert group_meta_file in g1.store - - # rmdir on the array path should also remove the metadata file - g1.store.rmdir(arr_path) - assert array_meta_file not in g1.store - # rmdir on the group path should also remove its metadata file - g1.store.rmdir(g2.path) - assert group_meta_file not in g1.store - - def _dataset_path(self, group, path): - path = path.rstrip("/") - absolute = path.startswith("/") - if absolute: - dataset_path = path - else: - dataset_path = "/".join([group.path, path]) - dataset_path = dataset_path.lstrip("/") - dataset_name = "/" + dataset_path - return dataset_path, dataset_name - - def test_create_dataset(self): - g = self.create_group() - - # create as immediate child - dpath = "foo" - d1 = g.create_dataset(dpath, shape=1000, chunks=100) - path, name = self._dataset_path(g, dpath) - assert isinstance(d1, Array) - assert (1000,) == d1.shape - assert (100,) == d1.chunks - assert path == d1.path - assert name == d1.name - assert g.store is d1.store - - # create as descendant - dpath = "/a/b/c/" - d2 = g.create_dataset( - dpath, - shape=2000, - chunks=200, - dtype="i1", - compression="zlib", - compression_opts=9, - fill_value=42, - order="F", - ) - path, name = self._dataset_path(g, dpath) - assert isinstance(d2, Array) - assert (2000,) == d2.shape - assert (200,) == d2.chunks - assert np.dtype("i1") == d2.dtype - assert "zlib" == d2.compressor.codec_id - assert 9 == d2.compressor.level - assert 42 == d2.fill_value - assert "F" == d2.order - assert path == d2.path - assert name == d2.name - assert g.store is d2.store - - # create with data - data = np.arange(3000, dtype="u2") - dpath = "bar" - d3 = g.create_dataset(dpath, data=data, chunks=300) - path, name = self._dataset_path(g, dpath) - assert isinstance(d3, Array) - assert (3000,) == d3.shape - assert (300,) == d3.chunks - assert np.dtype("u2") == d3.dtype - assert_array_equal(data, d3[:]) - assert path == d3.path - assert name == d3.name - assert g.store is d3.store - - # compression arguments handling follows... - - # compression_opts as dict - d = g.create_dataset( - "aaa", - shape=1000, - dtype="u1", - compression="blosc", - compression_opts=dict(cname="zstd", clevel=1, shuffle=2), - ) - assert d.compressor.codec_id == "blosc" - assert "zstd" == d.compressor.cname - assert 1 == d.compressor.clevel - assert 2 == d.compressor.shuffle - - # compression_opts as sequence - d = g.create_dataset( - "bbb", shape=1000, dtype="u1", compression="blosc", compression_opts=("zstd", 1, 2) - ) - assert d.compressor.codec_id == "blosc" - assert "zstd" == d.compressor.cname - assert 1 == d.compressor.clevel - assert 2 == d.compressor.shuffle - - # None compression_opts - d = g.create_dataset("ccc", shape=1000, dtype="u1", compression="zlib") - assert d.compressor.codec_id == "zlib" - assert 1 == d.compressor.level - - # None compression - d = g.create_dataset("ddd", shape=1000, dtype="u1", compression=None) - assert d.compressor is None - - # compressor as compression - d = g.create_dataset("eee", shape=1000, dtype="u1", compression=Zlib(1)) - assert d.compressor.codec_id == "zlib" - assert 1 == d.compressor.level - - g.store.close() - - def test_require_dataset(self): - g = self.create_group() - - # create - dpath = "foo" - d1 = g.require_dataset(dpath, shape=1000, chunks=100, dtype="f4") - d1[:] = np.arange(1000) - path, name = self._dataset_path(g, dpath) - assert isinstance(d1, Array) - assert (1000,) == d1.shape - assert (100,) == d1.chunks - assert np.dtype("f4") == d1.dtype - assert path == d1.path - assert name == d1.name - assert g.store is d1.store - assert_array_equal(np.arange(1000), d1[:]) - - # require - d2 = g.require_dataset(dpath, shape=1000, chunks=100, dtype="f4") - assert isinstance(d2, Array) - assert (1000,) == d2.shape - assert (100,) == d2.chunks - assert np.dtype("f4") == d2.dtype - assert path == d2.path - assert name == d2.name - assert g.store is d2.store - assert_array_equal(np.arange(1000), d2[:]) - assert d1 == d2 - - # bad shape - use TypeError for h5py compatibility - with pytest.raises(TypeError): - g.require_dataset("foo", shape=2000, chunks=100, dtype="f4") - - # dtype matching - # can cast - d3 = g.require_dataset("foo", shape=1000, chunks=100, dtype="i2") - assert np.dtype("f4") == d3.dtype - assert d1 == d3 - with pytest.raises(TypeError): - # cannot cast - g.require_dataset("foo", shape=1000, chunks=100, dtype="i4") - with pytest.raises(TypeError): - # can cast but not exact match - g.require_dataset("foo", shape=1000, chunks=100, dtype="i2", exact=True) - - g.store.close() - - def test_create_errors(self): - g = self.create_group() - - # array obstructs group, array - g.create_dataset("foo", shape=100, chunks=10) - with pytest.raises(ValueError): - g.create_group("foo/bar") - with pytest.raises(ValueError): - g.require_group("foo/bar") - with pytest.raises(ValueError): - g.create_dataset("foo/bar", shape=100, chunks=10) - with pytest.raises(ValueError): - g.require_dataset("foo/bar", shape=100, chunks=10) - - # array obstructs group, array - g.create_dataset("a/b", shape=100, chunks=10) - with pytest.raises(ValueError): - g.create_group("a/b") - with pytest.raises(ValueError): - g.require_group("a/b") - with pytest.raises(ValueError): - g.create_dataset("a/b", shape=100, chunks=10) - - # group obstructs array - g.create_group("c/d") - with pytest.raises(ValueError): - g.create_dataset("c", shape=100, chunks=10) - with pytest.raises(ValueError): - g.require_dataset("c", shape=100, chunks=10) - with pytest.raises(ValueError): - g.create_dataset("c/d", shape=100, chunks=10) - with pytest.raises(ValueError): - g.require_dataset("c/d", shape=100, chunks=10) - - # h5py compatibility, accept 'fillvalue' - d = g.create_dataset("x", shape=100, chunks=10, fillvalue=42) - assert 42 == d.fill_value - - # h5py compatibility, ignore 'shuffle' - with pytest.warns(UserWarning, match="ignoring keyword argument 'shuffle'"): - g.create_dataset("y", shape=100, chunks=10, shuffle=True) - - # read-only - g = self.create_group(read_only=True) - with pytest.raises(PermissionError): - g.create_group("zzz") - with pytest.raises(PermissionError): - g.require_group("zzz") - with pytest.raises(PermissionError): - g.create_dataset("zzz", shape=100, chunks=10) - with pytest.raises(PermissionError): - g.require_dataset("zzz", shape=100, chunks=10) - - g.store.close() - - def test_create_overwrite(self): - try: - for method_name in "create_dataset", "create", "empty", "zeros", "ones": - g = self.create_group() - getattr(g, method_name)("foo", shape=100, chunks=10) - - # overwrite array with array - d = getattr(g, method_name)("foo", shape=200, chunks=20, overwrite=True) - assert (200,) == d.shape - # overwrite array with group - g2 = g.create_group("foo", overwrite=True) - assert 0 == len(g2) - # overwrite group with array - d = getattr(g, method_name)("foo", shape=300, chunks=30, overwrite=True) - assert (300,) == d.shape - # overwrite array with group - d = getattr(g, method_name)("foo/bar", shape=400, chunks=40, overwrite=True) - assert (400,) == d.shape - assert isinstance(g["foo"], Group) - - g.store.close() - except NotImplementedError: - pass - - def test_getitem_contains_iterators(self): - # setup - g1 = self.create_group() - g2 = g1.create_group("foo/bar") - if g1._version == 2: - d1 = g2.create_dataset("/a/b/c", shape=1000, chunks=100) - else: - # v3: cannot create a dataset at the root by starting with / - # instead, need to create the dataset on g1 directly - d1 = g1.create_dataset("a/b/c", shape=1000, chunks=100) - d1[:] = np.arange(1000) - d2 = g1.create_dataset("foo/baz", shape=3000, chunks=300) - d2[:] = np.arange(3000) - - # test __getitem__ - assert isinstance(g1["foo"], Group) - assert isinstance(g1["foo"]["bar"], Group) - assert isinstance(g1["foo/bar"], Group) - if g1._version == 2: - assert isinstance(g1["/foo/bar/"], Group) - else: - # start or end with / raises KeyError - # TODO: should we allow stripping of these on v3? - with pytest.raises(KeyError): - assert isinstance(g1["/foo/bar/"], Group) - assert isinstance(g1["foo/baz"], Array) - assert g2 == g1["foo/bar"] - assert g1["foo"]["bar"] == g1["foo/bar"] - assert d2 == g1["foo/baz"] - assert_array_equal(d2[:], g1["foo/baz"]) - assert isinstance(g1["a"], Group) - assert isinstance(g1["a"]["b"], Group) - assert isinstance(g1["a/b"], Group) - assert isinstance(g1["a"]["b"]["c"], Array) - assert isinstance(g1["a/b/c"], Array) - assert d1 == g1["a/b/c"] - assert g1["a"]["b"]["c"] == g1["a/b/c"] - assert_array_equal(d1[:], g1["a/b/c"][:]) - - # test __contains__ - assert "foo" in g1 - assert "foo/bar" in g1 - assert "foo/baz" in g1 - assert "bar" in g1["foo"] - assert "a" in g1 - assert "a/b" in g1 - assert "a/b/c" in g1 - assert "baz" not in g1 - assert "a/b/c/d" not in g1 - assert "a/z" not in g1 - assert "quux" not in g1["foo"] - - # test key errors - with pytest.raises(KeyError): - g1["baz"] - with pytest.raises(KeyError): - g1["x/y/z"] - - # test __len__ - assert 2 == len(g1) - assert 2 == len(g1["foo"]) - assert 0 == len(g1["foo/bar"]) - assert 1 == len(g1["a"]) - assert 1 == len(g1["a/b"]) - - # test __iter__, keys() - - if g1._version == 2: - # currently assumes sorted by key - assert ["a", "foo"] == list(g1) - assert ["a", "foo"] == list(g1.keys()) - assert ["bar", "baz"] == list(g1["foo"]) - assert ["bar", "baz"] == list(g1["foo"].keys()) - else: - # v3 is not necessarily sorted by key - assert ["a", "foo"] == sorted(list(g1)) - assert ["a", "foo"] == sorted(list(g1.keys())) - assert ["bar", "baz"] == sorted(list(g1["foo"])) - assert ["bar", "baz"] == sorted(list(g1["foo"].keys())) - assert [] == sorted(g1["foo/bar"]) - assert [] == sorted(g1["foo/bar"].keys()) - - # test items(), values() - # currently assumes sorted by key - - items = list(g1.items()) - values = list(g1.values()) - if g1._version == 3: - # v3 are not automatically sorted by key - items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) - assert "a" == items[0][0] - assert g1["a"] == items[0][1] - assert g1["a"] == values[0] - assert "foo" == items[1][0] - assert g1["foo"] == items[1][1] - assert g1["foo"] == values[1] - - items = list(g1["foo"].items()) - values = list(g1["foo"].values()) - if g1._version == 3: - # v3 are not automatically sorted by key - items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) - assert "bar" == items[0][0] - assert g1["foo"]["bar"] == items[0][1] - assert g1["foo"]["bar"] == values[0] - assert "baz" == items[1][0] - assert g1["foo"]["baz"] == items[1][1] - assert g1["foo"]["baz"] == values[1] - - # test array_keys(), arrays(), group_keys(), groups() - - groups = list(g1.groups()) - arrays = list(g1.arrays()) - if g1._version == 2: - # currently assumes sorted by key - assert ["a", "foo"] == list(g1.group_keys()) - else: - assert ["a", "foo"] == sorted(list(g1.group_keys())) - groups = sorted(groups) - arrays = sorted(arrays) - assert "a" == groups[0][0] - assert g1["a"] == groups[0][1] - assert "foo" == groups[1][0] - assert g1["foo"] == groups[1][1] - assert [] == list(g1.array_keys()) - assert [] == arrays - - assert ["bar"] == list(g1["foo"].group_keys()) - assert ["baz"] == list(g1["foo"].array_keys()) - groups = list(g1["foo"].groups()) - arrays = list(g1["foo"].arrays()) - if g1._version == 3: - groups = sorted(groups) - arrays = sorted(arrays) - assert "bar" == groups[0][0] - assert g1["foo"]["bar"] == groups[0][1] - assert "baz" == arrays[0][0] - assert g1["foo"]["baz"] == arrays[0][1] - - # visitor collection tests - items = [] - - def visitor2(obj): - items.append(obj.path) - - # noinspection PyUnusedLocal - def visitor3(name, obj=None): - items.append(name) - - def visitor4(name, obj): - items.append((name, obj)) - - del items[:] - g1.visitvalues(visitor2) - expected_items = [ - "a", - "a/b", - "a/b/c", - "foo", - "foo/bar", - "foo/baz", - ] - if g1._version == 3: - expected_items = [g1.path + "/" + i for i in expected_items] - assert expected_items == items - - del items[:] - g1["foo"].visitvalues(visitor2) - expected_items = [ - "foo/bar", - "foo/baz", - ] - if g1._version == 3: - expected_items = [g1.path + "/" + i for i in expected_items] - assert expected_items == items - - del items[:] - g1.visit(visitor3) - assert [ - "a", - "a/b", - "a/b/c", - "foo", - "foo/bar", - "foo/baz", - ] == items - - del items[:] - g1["foo"].visit(visitor3) - assert [ - "bar", - "baz", - ] == items - - del items[:] - g1.visitkeys(visitor3) - assert [ - "a", - "a/b", - "a/b/c", - "foo", - "foo/bar", - "foo/baz", - ] == items - - del items[:] - g1["foo"].visitkeys(visitor3) - assert [ - "bar", - "baz", - ] == items - - del items[:] - g1.visititems(visitor3) - assert [ - "a", - "a/b", - "a/b/c", - "foo", - "foo/bar", - "foo/baz", - ] == items - - del items[:] - g1["foo"].visititems(visitor3) - assert [ - "bar", - "baz", - ] == items - - del items[:] - g1.visititems(visitor4) - for n, o in items: - assert g1[n] == o - - del items[:] - g1["foo"].visititems(visitor4) - for n, o in items: - assert g1["foo"][n] == o - - # visitor filter tests - # noinspection PyUnusedLocal - def visitor0(val, *args): - name = getattr(val, "path", val) - if name == "a/b/c/d": - return True # pragma: no cover - - # noinspection PyUnusedLocal - def visitor1(val, *args): - name = getattr(val, "path", val) - if name.startswith("group/"): - # strip the group path for v3 - name = name[6:] - if name == "a/b/c": - return True - - assert g1.visit(visitor0) is None - assert g1.visitkeys(visitor0) is None - assert g1.visitvalues(visitor0) is None - assert g1.visititems(visitor0) is None - assert g1.visit(visitor1) is True - assert g1.visitkeys(visitor1) is True - assert g1.visitvalues(visitor1) is True - assert g1.visititems(visitor1) is True - - g1.store.close() - - # regression test for https://github.com/zarr-developers/zarr-python/issues/1228 - def test_double_counting_group_v3(self): - root_group = self.create_group() - group_names = ["foo", "foo-", "foo_"] - for name in group_names: - sub_group = root_group.create_group(name) - sub_group.create("bar", shape=10, dtype="i4") - assert list(root_group.group_keys()) == sorted(group_names) - assert list(root_group.groups()) == [ - (name, root_group[name]) for name in sorted(group_names) - ] - - def test_empty_getitem_contains_iterators(self): - # setup - g = self.create_group() - - # test - assert [] == list(g) - assert [] == list(g.keys()) - assert 0 == len(g) - assert "foo" not in g - - g.store.close() - - def test_iterators_recurse(self): - # setup - g1 = self.create_group() - g2 = g1.create_group("foo/bar") - d1 = g2.create_dataset("/a/b/c", shape=1000, chunks=100) - d1[:] = np.arange(1000) - d2 = g1.create_dataset("foo/baz", shape=3000, chunks=300) - d2[:] = np.arange(3000) - d3 = g2.create_dataset("zab", shape=2000, chunks=200) - d3[:] = np.arange(2000) - - # test recursive array_keys - array_keys = list(g1["foo"].array_keys(recurse=False)) - array_keys_recurse = list(g1["foo"].array_keys(recurse=True)) - assert len(array_keys_recurse) > len(array_keys) - assert sorted(array_keys_recurse) == ["baz", "zab"] - - # test recursive arrays - arrays = list(g1["foo"].arrays(recurse=False)) - arrays_recurse = list(g1["foo"].arrays(recurse=True)) - assert len(arrays_recurse) > len(arrays) - assert "zab" == arrays_recurse[0][0] - assert g1["foo"]["bar"]["zab"] == arrays_recurse[0][1] - - g1.store.close() - - def test_getattr(self): - # setup - g1 = self.create_group() - g2 = g1.create_group("foo") - g2.create_dataset("bar", shape=100) - - # test - assert g1["foo"] == g1.foo - assert g2["bar"] == g2.bar - # test that hasattr returns False instead of an exception (issue #88) - assert not hasattr(g1, "unexistingattribute") - - g1.store.close() - - def test_setitem(self): - g = self.create_group() - try: - data = np.arange(100) - g["foo"] = data - assert_array_equal(data, g["foo"]) - data = np.arange(200) - g["foo"] = data - assert_array_equal(data, g["foo"]) - # 0d array - g["foo"] = 42 - assert () == g["foo"].shape - assert 42 == g["foo"][()] - except NotImplementedError: - pass - g.store.close() - - def test_delitem(self): - g = self.create_group() - g.create_group("foo") - g.create_dataset("bar/baz", shape=100, chunks=10) - assert "foo" in g - assert "bar" in g - assert "bar/baz" in g - try: - del g["bar"] - with pytest.raises(KeyError): - del g["xxx"] - except NotImplementedError: - pass - else: - assert "foo" in g - assert "bar" not in g - assert "bar/baz" not in g - g.store.close() - - def test_move(self): - g = self.create_group() - - data = np.arange(100) - g["boo"] = data - - data = np.arange(100) - g["foo"] = data - - g.move("foo", "bar") - assert "foo" not in g - assert "bar" in g - assert_array_equal(data, g["bar"]) - - g.move("bar", "foo/bar") - assert "bar" not in g - assert "foo" in g - assert "foo/bar" in g - assert isinstance(g["foo"], Group) - assert_array_equal(data, g["foo/bar"]) - - g.move("foo", "foo2") - assert "foo" not in g - assert "foo/bar" not in g - assert "foo2" in g - assert "foo2/bar" in g - assert isinstance(g["foo2"], Group) - assert_array_equal(data, g["foo2/bar"]) - - g2 = g["foo2"] - g2.move("bar", "/bar") - assert "foo2" in g - assert "foo2/bar" not in g - if g2._version == 2: - assert "bar" in g - else: - # The `g2.move` call above moved bar to meta/root/bar and - # meta/data/bar. This is outside the `g` group located at - # /meta/root/group, so bar is no longer within `g`. - assert "bar" not in g - assert "meta/root/bar.array.json" in g._store - if g._chunk_store: - assert "data/root/bar/c0" in g._chunk_store - else: - assert "data/root/bar/c0" in g._store - assert isinstance(g["foo2"], Group) - if g2._version == 2: - assert_array_equal(data, g["bar"]) - else: - # TODO: How to access element created outside of group.path in v3? - # One option is to make a Hierarchy class representing the - # root. Currently Group requires specification of `path`, - # but the path of the root would be just '' which is not - # currently allowed. - pass - - with pytest.raises(ValueError): - g2.move("bar", "bar2") - - with pytest.raises(ValueError): - g.move("bar", "boo") - - g.store.close() - - def test_array_creation(self): - grp = self.create_group() - - a = grp.create("a", shape=100, chunks=10) - assert isinstance(a, Array) - b = grp.empty("b", shape=100, chunks=10) - assert isinstance(b, Array) - assert b.fill_value is None - c = grp.zeros("c", shape=100, chunks=10) - assert isinstance(c, Array) - assert 0 == c.fill_value - d = grp.ones("d", shape=100, chunks=10) - assert isinstance(d, Array) - assert 1 == d.fill_value - e = grp.full("e", shape=100, chunks=10, fill_value=42) - assert isinstance(e, Array) - assert 42 == e.fill_value - - f = grp.empty_like("f", a) - assert isinstance(f, Array) - assert f.fill_value is None - g = grp.zeros_like("g", a) - assert isinstance(g, Array) - assert 0 == g.fill_value - h = grp.ones_like("h", a) - assert isinstance(h, Array) - assert 1 == h.fill_value - i = grp.full_like("i", e) - assert isinstance(i, Array) - assert 42 == i.fill_value - - j = grp.array("j", data=np.arange(100), chunks=10) - assert isinstance(j, Array) - assert_array_equal(np.arange(100), j[:]) - - grp.store.close() - - grp = self.create_group(read_only=True) - with pytest.raises(PermissionError): - grp.create("aa", shape=100, chunks=10) - with pytest.raises(PermissionError): - grp.empty("aa", shape=100, chunks=10) - with pytest.raises(PermissionError): - grp.zeros("aa", shape=100, chunks=10) - with pytest.raises(PermissionError): - grp.ones("aa", shape=100, chunks=10) - with pytest.raises(PermissionError): - grp.full("aa", shape=100, chunks=10, fill_value=42) - with pytest.raises(PermissionError): - grp.array("aa", data=np.arange(100), chunks=10) - with pytest.raises(PermissionError): - grp.create("aa", shape=100, chunks=10) - with pytest.raises(PermissionError): - grp.empty_like("aa", a) - with pytest.raises(PermissionError): - grp.zeros_like("aa", a) - with pytest.raises(PermissionError): - grp.ones_like("aa", a) - with pytest.raises(PermissionError): - grp.full_like("aa", a) - - grp.store.close() - - def test_paths(self): - g1 = self.create_group() - g2 = g1.create_group("foo/bar") - - if g1._version == 2: - assert g1 == g1["/"] - assert g1 == g1["//"] - assert g1 == g1["///"] - assert g1 == g2["/"] - assert g1 == g2["//"] - assert g1 == g2["///"] - assert g2 == g1["foo/bar"] - assert g2 == g1["/foo/bar"] - assert g2 == g1["foo/bar/"] - assert g2 == g1["//foo/bar"] - assert g2 == g1["//foo//bar//"] - assert g2 == g1["///foo///bar///"] - assert g2 == g2["/foo/bar"] - else: - # the expected key format gives a match - assert g2 == g1["foo/bar"] - - # TODO: Should presence of a trailing slash raise KeyError? - # The spec says "the final character is not a / character" - # but we currently strip trailing '/' as done for v2. - assert g2 == g1["foo/bar/"] - - # double slash also currently works (spec doesn't mention this - # case, but have kept it for v2 behavior compatibility) - assert g2 == g1["foo//bar"] - - # TODO, root: fix these cases - # v3: leading / implies we are at the root, not within a group, - # so these all raise KeyError - for path in ["/foo/bar", "//foo/bar", "//foo//bar//", "///fooo///bar///"]: - with pytest.raises(KeyError): - g1[path] - - with pytest.raises(ValueError): - g1["."] - with pytest.raises(ValueError): - g1[".."] - with pytest.raises(ValueError): - g1["foo/."] - with pytest.raises(ValueError): - g1["foo/.."] - with pytest.raises(ValueError): - g1["foo/./bar"] - with pytest.raises(ValueError): - g1["foo/../bar"] - - g1.store.close() - - def test_pickle(self): - # setup group - g = self.create_group() - d = g.create_dataset("foo/bar", shape=100, chunks=10) - d[:] = np.arange(100) - path = g.path - name = g.name - n = len(g) - keys = list(g) - - # round-trip through pickle - dump = pickle.dumps(g) - # some stores cannot be opened twice at the same time, need to close - # store before can round-trip through pickle - g.store.close() - g2 = pickle.loads(dump) - - # verify - assert path == g2.path - assert name == g2.name - assert n == len(g2) - assert keys == list(g2) - assert isinstance(g2["foo"], Group) - assert isinstance(g2["foo/bar"], Array) - - g2.store.close() - - def test_context_manager(self): - with self.create_group() as g: - d = g.create_dataset("foo/bar", shape=100, chunks=10) - d[:] = np.arange(100) - - -@pytest.mark.parametrize("chunk_dict", [False, True]) -def test_group_init_from_dict(chunk_dict): - if chunk_dict: - store, chunk_store = dict(), dict() - else: - store, chunk_store = dict(), None - init_group(store, path=None, chunk_store=chunk_store) - g = Group(store, path=None, read_only=False, chunk_store=chunk_store) - assert store is not g.store - assert isinstance(g.store, KVStore) - if chunk_store is None: - assert g.store is g.chunk_store - else: - assert chunk_store is not g.chunk_store - - -# noinspection PyStatementEffect -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3(TestGroup, unittest.TestCase): - @staticmethod - def create_store(): - # can be overridden in sub-classes - return KVStoreV3(dict()), None - - def create_group( - self, store=None, path="group", read_only=False, chunk_store=None, synchronizer=None - ): - # can be overridden in sub-classes - if store is None: - store, chunk_store = self.create_store() - init_group(store, path=path, chunk_store=chunk_store) - g = Group( - store, - path=path, - read_only=read_only, - chunk_store=chunk_store, - synchronizer=synchronizer, - ) - return g - - def test_group_init_1(self): - store, chunk_store = self.create_store() - g = self.create_group(store, chunk_store=chunk_store) - assert store is g.store - if chunk_store is None: - assert store is g.chunk_store - else: - assert chunk_store is g.chunk_store - assert not g.read_only - # different path/name in v3 case - assert "group" == g.path - assert "/group" == g.name - assert "group" == g.basename - - assert isinstance(g.attrs, Attributes) - g.attrs["foo"] = "bar" - assert g.attrs["foo"] == "bar" - - assert isinstance(g.info, InfoReporter) - assert isinstance(repr(g.info), str) - assert isinstance(g.info._repr_html_(), str) - store.close() - - def test_group_init_errors_2(self): - store, chunk_store = self.create_store() - path = "tmp" - init_array(store, path=path, shape=1000, chunks=100, chunk_store=chunk_store) - # array blocks group - with pytest.raises(ValueError): - Group(store, path=path, chunk_store=chunk_store) - store.close() - - -class TestGroupWithMemoryStore(TestGroup): - @staticmethod - def create_store(): - return MemoryStore(), None - - -# noinspection PyStatementEffect -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithMemoryStore(TestGroupWithMemoryStore, TestGroupV3): - @staticmethod - def create_store(): - return MemoryStoreV3(), None - - -class TestGroupWithDirectoryStore(TestGroup): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = DirectoryStore(path) - return store, None - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithDirectoryStore(TestGroupWithDirectoryStore, TestGroupV3): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = DirectoryStoreV3(path) - return store, None - - -@skip_test_env_var("ZARR_TEST_ABS") -class TestGroupWithABSStore(TestGroup): - @staticmethod - def create_store(): - container_client = abs_container() - store = ABSStore(client=container_client) - store.rmdir() - return store, None - - @pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36") - def test_pickle(self): - # internal attribute on ContainerClient isn't serializable for py36 and earlier - super().test_pickle() - - -@skip_test_env_var("ZARR_TEST_ABS") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithABSStore(TestGroupV3): - @staticmethod - def create_store(): - container_client = abs_container() - store = ABSStoreV3(client=container_client) - store.rmdir() - return store, None - - @pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36") - def test_pickle(self): - # internal attribute on ContainerClient isn't serializable for py36 and earlier - super().test_pickle() - - -class TestGroupWithNestedDirectoryStore(TestGroup): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = NestedDirectoryStore(path) - return store, None - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestGroupWithFSStore(TestGroup): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = FSStore(path) - return store, None - - def test_round_trip_nd(self): - data = np.arange(1000).reshape(10, 10, 10) - name = "raw" - - store, _ = self.create_store() - f = open_group(store, mode="w") - f.create_dataset(name, data=data, chunks=(5, 5, 5), compressor=None) - assert name in f - h = open_group(store, mode="r") - np.testing.assert_array_equal(h[name][:], data) - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithFSStore(TestGroupWithFSStore, TestGroupV3): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = FSStoreV3(path) - return store, None - - def test_round_trip_nd(self): - data = np.arange(1000).reshape(10, 10, 10) - name = "raw" - - store, _ = self.create_store() - f = open_group(store, path="group", mode="w") - f.create_dataset(name, data=data, chunks=(5, 5, 5), compressor=None) - h = open_group(store, path="group", mode="r") - np.testing.assert_array_equal(h[name][:], data) - - f = open_group(store, path="group2", mode="w") - - data_size = data.nbytes - group_meta_size = buffer_size(store[meta_root + "group.group.json"]) - group2_meta_size = buffer_size(store[meta_root + "group2.group.json"]) - array_meta_size = buffer_size(store[meta_root + "group/raw.array.json"]) - assert store.getsize() == data_size + group_meta_size + group2_meta_size + array_meta_size - # added case with path to complete coverage - assert store.getsize("group") == data_size + group_meta_size + array_meta_size - assert store.getsize("group2") == group2_meta_size - assert store.getsize("group/raw") == data_size + array_meta_size - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestGroupWithNestedFSStore(TestGroupWithFSStore): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = FSStore(path, key_separator="/", auto_mkdir=True) - return store, None - - def test_inconsistent_dimension_separator(self): - data = np.arange(1000).reshape(10, 10, 10) - name = "raw" - - store, _ = self.create_store() - f = open_group(store, mode="w") - - # cannot specify dimension_separator that conflicts with the store - with pytest.raises(ValueError): - f.create_dataset( - name, data=data, chunks=(5, 5, 5), compressor=None, dimension_separator="." - ) - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithNestedFSStore(TestGroupV3WithFSStore): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = FSStoreV3(path, key_separator="/", auto_mkdir=True) - return store, None - - def test_inconsistent_dimension_separator(self): - data = np.arange(1000).reshape(10, 10, 10) - name = "raw" - - store, _ = self.create_store() - f = open_group(store, path="group", mode="w") - - # cannot specify dimension_separator that conflicts with the store - with pytest.raises(ValueError): - f.create_dataset( - name, data=data, chunks=(5, 5, 5), compressor=None, dimension_separator="." - ) - - -class TestGroupWithZipStore(TestGroup): - @staticmethod - def create_store(): - path = mktemp(suffix=".zip") - atexit.register(os.remove, path) - store = ZipStore(path) - return store, None - - def test_context_manager(self): - with self.create_group() as g: - store = g.store - d = g.create_dataset("foo/bar", shape=100, chunks=10) - d[:] = np.arange(100) - - # Check that exiting the context manager closes the store, - # and therefore the underlying ZipFile. - with pytest.raises(ValueError): - store.zf.extractall() - - def test_move(self): - # zip store is not erasable (can so far only append to a zip - # so we can't test for move. - pass - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithZipStore(TestGroupWithZipStore, TestGroupV3): - @staticmethod - def create_store(): - path = mktemp(suffix=".zip") - atexit.register(os.remove, path) - store = ZipStoreV3(path) - return store, None - - -class TestGroupWithDBMStore(TestGroup): - @staticmethod - def create_store(): - path = mktemp(suffix=".anydbm") - atexit.register(atexit_rmglob, path + "*") - store = DBMStore(path, flag="n") - return store, None - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithDBMStore(TestGroupWithDBMStore, TestGroupV3): - @staticmethod - def create_store(): - path = mktemp(suffix=".anydbm") - atexit.register(atexit_rmglob, path + "*") - store = DBMStoreV3(path, flag="n") - return store, None - - -class TestGroupWithDBMStoreBerkeleyDB(TestGroup): - @staticmethod - def create_store(): - bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix=".dbm") - atexit.register(os.remove, path) - store = DBMStore(path, flag="n", open=bsddb3.btopen) - return store, None - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithDBMStoreBerkeleyDB(TestGroupWithDBMStoreBerkeleyDB, TestGroupV3): - @staticmethod - def create_store(): - bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix=".dbm") - atexit.register(os.remove, path) - store = DBMStoreV3(path, flag="n", open=bsddb3.btopen) - return store, None - - -class TestGroupWithLMDBStore(TestGroup): - @staticmethod - def create_store(): - pytest.importorskip("lmdb") - path = mktemp(suffix=".lmdb") - atexit.register(atexit_rmtree, path) - store = LMDBStore(path) - return store, None - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithLMDBStore(TestGroupWithLMDBStore, TestGroupV3): - @staticmethod - def create_store(): - pytest.importorskip("lmdb") - path = mktemp(suffix=".lmdb") - atexit.register(atexit_rmtree, path) - store = LMDBStoreV3(path) - return store, None - - -class TestGroupWithSQLiteStore(TestGroup): - def create_store(self): - pytest.importorskip("sqlite3") - path = mktemp(suffix=".db") - atexit.register(atexit_rmtree, path) - store = SQLiteStore(path) - return store, None - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithSQLiteStore(TestGroupWithSQLiteStore, TestGroupV3): - def create_store(self): - pytest.importorskip("sqlite3") - path = mktemp(suffix=".db") - atexit.register(atexit_rmtree, path) - store = SQLiteStoreV3(path) - return store, None - - -class TestGroupWithChunkStore(TestGroup): - @staticmethod - def create_store(): - return KVStore(dict()), KVStore(dict()) - - def test_chunk_store(self): - # setup - store, chunk_store = self.create_store() - g = self.create_group(store, chunk_store=chunk_store) - - # check attributes - assert store is g.store - assert chunk_store is g.chunk_store - - # create array - a = g.zeros("foo", shape=100, chunks=10) - assert store is a.store - assert chunk_store is a.chunk_store - a[:] = np.arange(100) - assert_array_equal(np.arange(100), a[:]) - - # check store keys - expect = sorted([group_meta_key, "foo/" + array_meta_key]) - actual = sorted(store.keys()) - assert expect == actual - expect = ["foo/" + str(i) for i in range(10)] - actual = sorted(chunk_store.keys()) - assert expect == actual - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithChunkStore(TestGroupWithChunkStore, TestGroupV3): - @staticmethod - def create_store(): - return KVStoreV3(dict()), KVStoreV3(dict()) - - def test_chunk_store(self): - # setup - store, chunk_store = self.create_store() - path = "group1" - g = self.create_group(store, path=path, chunk_store=chunk_store) - - # check attributes - assert store is g.store - assert chunk_store is g.chunk_store - - # create array - a = g.zeros("foo", shape=100, chunks=10) - assert store is a.store - assert chunk_store is a.chunk_store - a[:] = np.arange(100) - assert_array_equal(np.arange(100), a[:]) - - # check store keys - group_key = meta_root + path + ".group.json" - array_key = meta_root + path + "/foo" + ".array.json" - expect = sorted([group_key, array_key, "zarr.json"]) - actual = sorted(store.keys()) - assert expect == actual - expect = [data_root + path + "/foo/c" + str(i) for i in range(10)] - expect += ["zarr.json"] - actual = sorted(chunk_store.keys()) - assert expect == actual - - -class TestGroupWithStoreCache(TestGroup): - @staticmethod - def create_store(): - store = LRUStoreCache(dict(), max_size=None) - return store, None - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithStoreCache(TestGroupWithStoreCache, TestGroupV3): - @staticmethod - def create_store(): - store = LRUStoreCacheV3(dict(), max_size=None) - return store, None - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_group(zarr_version): - # test the group() convenience function - - # basic usage - if zarr_version == 2: - g = group() - assert "" == g.path - assert "/" == g.name - else: - g = group(path="group1", zarr_version=zarr_version) - assert "group1" == g.path - assert "/group1" == g.name - assert isinstance(g, Group) - - # usage with custom store - if zarr_version == 2: - store = KVStore(dict()) - path = None - else: - store = KVStoreV3(dict()) - path = "foo" - g = group(store=store, path=path) - assert isinstance(g, Group) - assert store is g.store - - # overwrite behaviour - if zarr_version == 2: - store = KVStore(dict()) - path = None - else: - store = KVStoreV3(dict()) - path = "foo" - init_array(store, path=path, shape=100, chunks=10) - with pytest.raises(ValueError): - group(store, path=path) - g = group(store, path=path, overwrite=True) - assert isinstance(g, Group) - assert store is g.store - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_group_writeable_mode(zarr_version, tmp_path): - # Regression test for https://github.com/zarr-developers/zarr-python/issues/1353 - import fsspec - - store = fsspec.get_mapper(str(tmp_path)) - zg = group(store=store) - assert zg.store.map == store - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_open_group(zarr_version): - # test the open_group() convenience function - - store = "data/group.zarr" - - expected_store_type = DirectoryStore if zarr_version == 2 else DirectoryStoreV3 - - # mode == 'w' - path = None if zarr_version == 2 else "group1" - g = open_group(store, path=path, mode="w", zarr_version=zarr_version) - assert isinstance(g, Group) - assert isinstance(g.store, expected_store_type) - assert 0 == len(g) - g.create_groups("foo", "bar") - assert 2 == len(g) - - # mode in 'r', 'r+' - open_array("data/array.zarr", shape=100, chunks=10, mode="w") - for mode in "r", "r+": - with pytest.raises(ValueError): - open_group("doesnotexist", mode=mode) - with pytest.raises(ValueError): - open_group("data/array.zarr", mode=mode) - g = open_group(store, mode="r") - assert isinstance(g, Group) - assert 2 == len(g) - with pytest.raises(PermissionError): - g.create_group("baz") - g = open_group(store, mode="r+") - assert isinstance(g, Group) - assert 2 == len(g) - g.create_groups("baz", "quux") - assert 4 == len(g) - - # mode == 'a' - shutil.rmtree(store) - g = open_group(store, path=path, mode="a", zarr_version=zarr_version) - assert isinstance(g, Group) - assert isinstance(g.store, expected_store_type) - assert 0 == len(g) - g.create_groups("foo", "bar") - assert 2 == len(g) - if zarr_version == 2: - with pytest.raises(ValueError): - open_group("data/array.zarr", mode="a", zarr_version=zarr_version) - else: - # TODO, root: should this raise an error? - open_group("data/array.zarr", mode="a", zarr_version=zarr_version) - - # mode in 'w-', 'x' - for mode in "w-", "x": - shutil.rmtree(store) - g = open_group(store, path=path, mode=mode, zarr_version=zarr_version) - assert isinstance(g, Group) - assert isinstance(g.store, expected_store_type) - assert 0 == len(g) - g.create_groups("foo", "bar") - assert 2 == len(g) - with pytest.raises(ValueError): - open_group(store, path=path, mode=mode, zarr_version=zarr_version) - if zarr_version == 2: - with pytest.raises(ValueError): - open_group("data/array.zarr", mode=mode) - - # open with path - g = open_group(store, path="foo/bar", zarr_version=zarr_version) - assert isinstance(g, Group) - assert "foo/bar" == g.path - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_group_completions(zarr_version): - path = None if zarr_version == 2 else "group1" - g = group(path=path, zarr_version=zarr_version) - d = dir(g) - assert "foo" not in d - assert "bar" not in d - assert "baz" not in d - assert "qux" not in d - assert "xxx" not in d - assert "yyy" not in d - assert "zzz" not in d - assert "123" not in d - assert "456" not in d - g.create_groups("foo", "bar", "baz/qux", "123") - g.zeros("xxx", shape=100) - g.zeros("yyy", shape=100) - g.zeros("zzz", shape=100) - g.zeros("456", shape=100) - d = dir(g) - assert "foo" in d - assert "bar" in d - assert "baz" in d - assert "qux" not in d - assert "xxx" in d - assert "yyy" in d - assert "zzz" in d - assert "123" not in d # not valid identifier - assert "456" not in d # not valid identifier - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_group_key_completions(zarr_version): - path = None if zarr_version == 2 else "group1" - g = group(path=path, zarr_version=zarr_version) - d = dir(g) - # noinspection PyProtectedMember - k = g._ipython_key_completions_() - - # none of these names should be an attribute - assert "foo" not in d - assert "bar" not in d - assert "baz" not in d - assert "qux" not in d - assert "xxx" not in d - assert "yyy" not in d - assert "zzz" not in d - assert "123" not in d - assert "456" not in d - assert "asdf;" not in d - - # none of these names should be an item - assert "foo" not in k - assert "bar" not in k - assert "baz" not in k - assert "qux" not in k - assert "xxx" not in k - assert "yyy" not in k - assert "zzz" not in k - assert "123" not in k - assert "456" not in k - assert "asdf;" not in k - - g.create_groups("foo", "bar", "baz/qux", "123") - g.zeros("xxx", shape=100) - g.zeros("yyy", shape=100) - g.zeros("zzz", shape=100) - g.zeros("456", shape=100) - if zarr_version == 2: - g.zeros("asdf;", shape=100) - else: - # cannot have ; in key name for v3 - with pytest.raises(ValueError): - g.zeros("asdf;", shape=100) - - d = dir(g) - # noinspection PyProtectedMember - k = g._ipython_key_completions_() - - assert "foo" in d - assert "bar" in d - assert "baz" in d - assert "qux" not in d - assert "xxx" in d - assert "yyy" in d - assert "zzz" in d - assert "123" not in d # not valid identifier - assert "456" not in d # not valid identifier - if zarr_version == 2: - assert "asdf;" not in d # not valid identifier - - assert "foo" in k - assert "bar" in k - assert "baz" in k - assert "qux" not in k - assert "xxx" in k - assert "yyy" in k - assert "zzz" in k - assert "123" in k - assert "456" in k - if zarr_version == 2: - assert "asdf;" in k - - -def _check_tree(g, expect_bytes, expect_text): - assert expect_bytes == bytes(g.tree()) - assert expect_text == str(g.tree()) - expect_repr = expect_text - assert expect_repr == repr(g.tree()) - if ipytree: - # noinspection PyProtectedMember - widget = g.tree()._repr_mimebundle_() - isinstance(widget, ipytree.Tree) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -@pytest.mark.parametrize("at_root", [False, True]) -def test_tree(zarr_version, at_root): - # setup - path = None if at_root else "group1" - g1 = group(path=path, zarr_version=zarr_version) - g2 = g1.create_group("foo") - g3 = g1.create_group("bar") - g3.create_group("baz") - g5 = g3.create_group("quux") - g5.create_dataset("baz", shape=100, chunks=10) - - tree_path = "/" if at_root else path - # test root group - if zarr_version == 2: - expect_bytes = textwrap.dedent( - f"""\ - {tree_path} - +-- bar - | +-- baz - | +-- quux - | +-- baz (100,) float64 - +-- foo""" - ).encode() - expect_text = textwrap.dedent( - f"""\ - {tree_path} - ├── bar - │ ├── baz - │ └── quux - │ └── baz (100,) float64 - └── foo""" - ) - else: - # Almost the same as for v2, but has a path name and the - # subgroups are not necessarily sorted alphabetically. - expect_bytes = textwrap.dedent( - f"""\ - {tree_path} - +-- foo - +-- bar - +-- baz - +-- quux - +-- baz (100,) float64""" - ).encode() - expect_text = textwrap.dedent( - f"""\ - {tree_path} - ├── foo - └── bar - ├── baz - └── quux - └── baz (100,) float64""" - ) - _check_tree(g1, expect_bytes, expect_text) - - # test different group - expect_bytes = textwrap.dedent( - """\ - foo""" - ).encode() - expect_text = textwrap.dedent( - """\ - foo""" - ) - _check_tree(g2, expect_bytes, expect_text) - - # test different group - expect_bytes = textwrap.dedent( - """\ - bar - +-- baz - +-- quux - +-- baz (100,) float64""" - ).encode() - expect_text = textwrap.dedent( - """\ - bar - ├── baz - └── quux - └── baz (100,) float64""" - ) - _check_tree(g3, expect_bytes, expect_text) - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_group_mismatched_store_versions(): - store_v3 = KVStoreV3(dict()) - store_v2 = KVStore(dict()) - - # separate chunk store - chunk_store_v2 = KVStore(dict()) - chunk_store_v3 = KVStoreV3(dict()) - - init_group(store_v2, path="group1", chunk_store=chunk_store_v2) - init_group(store_v3, path="group1", chunk_store=chunk_store_v3) - - g1_v3 = Group(store_v3, path="group1", read_only=True, chunk_store=chunk_store_v3) - assert isinstance(g1_v3._store, KVStoreV3) - g1_v2 = Group(store_v2, path="group1", read_only=True, chunk_store=chunk_store_v2) - assert isinstance(g1_v2._store, KVStore) - - # store and chunk_store must have the same zarr protocol version - with pytest.raises(ValueError): - Group(store_v3, path="group1", read_only=False, chunk_store=chunk_store_v2) - with pytest.raises(ValueError): - Group(store_v2, path="group1", read_only=False, chunk_store=chunk_store_v3) - with pytest.raises(ValueError): - open_group(store_v2, path="group1", chunk_store=chunk_store_v3) - with pytest.raises(ValueError): - open_group(store_v3, path="group1", chunk_store=chunk_store_v2) - - # raises Value if read_only and path is not a pre-existing group - with pytest.raises(ValueError): - Group(store_v3, path="group2", read_only=True, chunk_store=chunk_store_v3) - with pytest.raises(ValueError): - Group(store_v3, path="group2", read_only=True, chunk_store=chunk_store_v3) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_open_group_from_paths(zarr_version): - """Verify zarr_version is applied to both the store and chunk_store.""" - store = tempfile.mkdtemp() - chunk_store = tempfile.mkdtemp() - atexit.register(atexit_rmtree, store) - atexit.register(atexit_rmtree, chunk_store) - path = "g1" - g = open_group(store, path=path, chunk_store=chunk_store, zarr_version=zarr_version) - assert g._store._store_version == g._chunk_store._store_version == zarr_version diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py deleted file mode 100644 index a3afc101c5..0000000000 --- a/zarr/tests/test_indexing.py +++ /dev/null @@ -1,1755 +0,0 @@ -import numpy -import numpy as np -import pytest -from numpy.testing import assert_array_equal - -import zarr -from zarr.indexing import ( - make_slice_selection, - normalize_integer_selection, - oindex, - oindex_set, - replace_ellipsis, - PartialChunkIterator, -) - -from zarr.tests.util import CountingDict - - -def test_normalize_integer_selection(): - assert 1 == normalize_integer_selection(1, 100) - assert 99 == normalize_integer_selection(-1, 100) - with pytest.raises(IndexError): - normalize_integer_selection(100, 100) - with pytest.raises(IndexError): - normalize_integer_selection(1000, 100) - with pytest.raises(IndexError): - normalize_integer_selection(-1000, 100) - - -def test_replace_ellipsis(): - # 1D, single item - assert (0,) == replace_ellipsis(0, (100,)) - - # 1D - assert (slice(None),) == replace_ellipsis(Ellipsis, (100,)) - assert (slice(None),) == replace_ellipsis(slice(None), (100,)) - assert (slice(None, 100),) == replace_ellipsis(slice(None, 100), (100,)) - assert (slice(0, None),) == replace_ellipsis(slice(0, None), (100,)) - assert (slice(None),) == replace_ellipsis((slice(None), Ellipsis), (100,)) - assert (slice(None),) == replace_ellipsis((Ellipsis, slice(None)), (100,)) - - # 2D, single item - assert (0, 0) == replace_ellipsis((0, 0), (100, 100)) - assert (-1, 1) == replace_ellipsis((-1, 1), (100, 100)) - - # 2D, single col/row - assert (0, slice(None)) == replace_ellipsis((0, slice(None)), (100, 100)) - assert (0, slice(None)) == replace_ellipsis((0,), (100, 100)) - assert (slice(None), 0) == replace_ellipsis((slice(None), 0), (100, 100)) - - # 2D slice - assert (slice(None), slice(None)) == replace_ellipsis(Ellipsis, (100, 100)) - assert (slice(None), slice(None)) == replace_ellipsis(slice(None), (100, 100)) - assert (slice(None), slice(None)) == replace_ellipsis((slice(None), slice(None)), (100, 100)) - assert (slice(None), slice(None)) == replace_ellipsis((Ellipsis, slice(None)), (100, 100)) - assert (slice(None), slice(None)) == replace_ellipsis((slice(None), Ellipsis), (100, 100)) - assert (slice(None), slice(None)) == replace_ellipsis( - (slice(None), Ellipsis, slice(None)), (100, 100) - ) - assert (slice(None), slice(None)) == replace_ellipsis( - (Ellipsis, slice(None), slice(None)), (100, 100) - ) - assert (slice(None), slice(None)) == replace_ellipsis( - (slice(None), slice(None), Ellipsis), (100, 100) - ) - - -def test_get_basic_selection_0d(): - # setup - a = np.array(42) - z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) - z[...] = a - - assert_array_equal(a, z.get_basic_selection(Ellipsis)) - assert_array_equal(a, z[...]) - assert 42 == z.get_basic_selection(()) - assert 42 == z[()] - - # test out param - b = np.zeros_like(a) - z.get_basic_selection(Ellipsis, out=b) - assert_array_equal(a, b) - - # test structured array - value = (b"aaa", 1, 4.2) - a = np.array(value, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) - z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) - z[()] = value - assert_array_equal(a, z.get_basic_selection(Ellipsis)) - assert_array_equal(a, z[...]) - assert a[()] == z.get_basic_selection(()) - assert a[()] == z[()] - assert b"aaa" == z.get_basic_selection((), fields="foo") - assert b"aaa" == z["foo"] - assert a[["foo", "bar"]] == z.get_basic_selection((), fields=["foo", "bar"]) - assert a[["foo", "bar"]] == z["foo", "bar"] - # test out param - b = np.zeros_like(a) - z.get_basic_selection(Ellipsis, out=b) - assert_array_equal(a, b) - c = np.zeros_like(a[["foo", "bar"]]) - z.get_basic_selection(Ellipsis, out=c, fields=["foo", "bar"]) - assert_array_equal(a[["foo", "bar"]], c) - - -basic_selections_1d = [ - # single value - 42, - -1, - # slices - slice(0, 1050), - slice(50, 150), - slice(0, 2000), - slice(-150, -50), - slice(-2000, 2000), - slice(0, 0), # empty result - slice(-1, 0), # empty result - # total selections - slice(None), - Ellipsis, - (), - (Ellipsis, slice(None)), - # slice with step - slice(None), - slice(None, None), - slice(None, None, 1), - slice(None, None, 10), - slice(None, None, 100), - slice(None, None, 1000), - slice(None, None, 10000), - slice(0, 1050), - slice(0, 1050, 1), - slice(0, 1050, 10), - slice(0, 1050, 100), - slice(0, 1050, 1000), - slice(0, 1050, 10000), - slice(1, 31, 3), - slice(1, 31, 30), - slice(1, 31, 300), - slice(81, 121, 3), - slice(81, 121, 30), - slice(81, 121, 300), - slice(50, 150), - slice(50, 150, 1), - slice(50, 150, 10), -] - - -basic_selections_1d_bad = [ - # only positive step supported - slice(None, None, -1), - slice(None, None, -10), - slice(None, None, -100), - slice(None, None, -1000), - slice(None, None, -10000), - slice(1050, -1, -1), - slice(1050, -1, -10), - slice(1050, -1, -100), - slice(1050, -1, -1000), - slice(1050, -1, -10000), - slice(1050, 0, -1), - slice(1050, 0, -10), - slice(1050, 0, -100), - slice(1050, 0, -1000), - slice(1050, 0, -10000), - slice(150, 50, -1), - slice(150, 50, -10), - slice(31, 1, -3), - slice(121, 81, -3), - slice(-1, 0, -1), - # bad stuff - 2.3, - "foo", - b"xxx", - None, - (0, 0), - (slice(None), slice(None)), -] - - -def _test_get_basic_selection(a, z, selection): - expect = a[selection] - actual = z.get_basic_selection(selection) - assert_array_equal(expect, actual) - actual = z[selection] - assert_array_equal(expect, actual) - - -# noinspection PyStatementEffect -def test_get_basic_selection_1d(): - # setup - a = np.arange(1050, dtype=int) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - z[:] = a - - for selection in basic_selections_1d: - _test_get_basic_selection(a, z, selection) - - for selection in basic_selections_1d_bad: - with pytest.raises(IndexError): - z.get_basic_selection(selection) - with pytest.raises(IndexError): - z[selection] - - with pytest.raises(IndexError): - z.get_basic_selection([1, 0]) - - -basic_selections_2d = [ - # single row - 42, - -1, - (42, slice(None)), - (-1, slice(None)), - # single col - (slice(None), 4), - (slice(None), -1), - # row slices - slice(None), - slice(0, 1000), - slice(250, 350), - slice(0, 2000), - slice(-350, -250), - slice(0, 0), # empty result - slice(-1, 0), # empty result - slice(-2000, 0), - slice(-2000, 2000), - # 2D slices - (slice(None), slice(1, 5)), - (slice(250, 350), slice(None)), - (slice(250, 350), slice(1, 5)), - (slice(250, 350), slice(-5, -1)), - (slice(250, 350), slice(-50, 50)), - (slice(250, 350, 10), slice(1, 5)), - (slice(250, 350), slice(1, 5, 2)), - (slice(250, 350, 33), slice(1, 5, 3)), - # total selections - (slice(None), slice(None)), - Ellipsis, - (), - (Ellipsis, slice(None)), - (Ellipsis, slice(None), slice(None)), -] - - -basic_selections_2d_bad = [ - # bad stuff - 2.3, - "foo", - b"xxx", - None, - (2.3, slice(None)), - # only positive step supported - slice(None, None, -1), - (slice(None, None, -1), slice(None)), - (0, 0, 0), - (slice(None), slice(None), slice(None)), -] - - -# noinspection PyStatementEffect -def test_get_basic_selection_2d(): - # setup - a = np.arange(10000, dtype=int).reshape(1000, 10) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - z[:] = a - - for selection in basic_selections_2d: - _test_get_basic_selection(a, z, selection) - - bad_selections = basic_selections_2d_bad + [ - # integer arrays - [0, 1], - (slice(None), [0, 1]), - ] - for selection in bad_selections: - with pytest.raises(IndexError): - z.get_basic_selection(selection) - # check fallback on fancy indexing - fancy_selection = ([0, 1], [0, 1]) - np.testing.assert_array_equal(z[fancy_selection], [0, 11]) - - -def test_fancy_indexing_fallback_on_get_setitem(): - z = zarr.zeros((20, 20)) - z[[1, 2, 3], [1, 2, 3]] = 1 - np.testing.assert_array_equal( - z[:4, :4], - [ - [0, 0, 0, 0], - [0, 1, 0, 0], - [0, 0, 1, 0], - [0, 0, 0, 1], - ], - ) - np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) - # test broadcasting - np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) - # test 1D fancy indexing - z2 = zarr.zeros(5) - z2[[1, 2, 3]] = 1 - np.testing.assert_array_equal(z2, [0, 1, 1, 1, 0]) - - -@pytest.mark.parametrize( - "index,expected_result", - [ - # Single iterable of integers - ([0, 1], [[0, 1, 2], [3, 4, 5]]), - # List first, then slice - (([0, 1], slice(None)), [[0, 1, 2], [3, 4, 5]]), - # List first, then slice - (([0, 1], slice(1, None)), [[1, 2], [4, 5]]), - # Slice first, then list - ((slice(0, 2), [0, 2]), [[0, 2], [3, 5]]), - # Slices only - ((slice(0, 2), slice(0, 2)), [[0, 1], [3, 4]]), - # List with repeated index - (([1, 0, 1], slice(1, None)), [[4, 5], [1, 2], [4, 5]]), - # 1D indexing - (([1, 0, 1]), [[3, 4, 5], [0, 1, 2], [3, 4, 5]]), - ], -) -def test_orthogonal_indexing_fallback_on_getitem_2d(index, expected_result): - """ - Tests the orthogonal indexing fallback on __getitem__ for a 2D matrix. - - In addition to checking expected behavior, all indexing - is also checked against numpy. - """ - # [0, 1, 2], - # [3, 4, 5], - # [6, 7, 8] - a = np.arange(9).reshape(3, 3) - z = zarr.array(a) - - np.testing.assert_array_equal(z[index], a[index], err_msg="Indexing disagrees with numpy") - np.testing.assert_array_equal(z[index], expected_result) - - -@pytest.mark.parametrize( - "index,expected_result", - [ - # Single iterable of integers - ([0, 1], [[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14], [15, 16, 17]]]), - # One slice, two integers - ((slice(0, 2), 1, 1), [4, 13]), - # One integer, two slices - ((slice(0, 2), 1, slice(0, 2)), [[3, 4], [12, 13]]), - # Two slices and a list - ((slice(0, 2), [1, 2], slice(0, 2)), [[[3, 4], [6, 7]], [[12, 13], [15, 16]]]), - ], -) -def test_orthogonal_indexing_fallback_on_getitem_3d(index, expected_result): - """ - Tests the orthogonal indexing fallback on __getitem__ for a 3D matrix. - - In addition to checking expected behavior, all indexing - is also checked against numpy. - """ - # [[[ 0, 1, 2], - # [ 3, 4, 5], - # [ 6, 7, 8]], - - # [[ 9, 10, 11], - # [12, 13, 14], - # [15, 16, 17]], - - # [[18, 19, 20], - # [21, 22, 23], - # [24, 25, 26]]] - a = np.arange(27).reshape(3, 3, 3) - z = zarr.array(a) - - np.testing.assert_array_equal(z[index], a[index], err_msg="Indexing disagrees with numpy") - np.testing.assert_array_equal(z[index], expected_result) - - -@pytest.mark.parametrize( - "index,expected_result", - [ - # Single iterable of integers - ([0, 1], [[1, 1, 1], [1, 1, 1], [0, 0, 0]]), - # List and slice combined - (([0, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), - # Index repetition is ignored on setitem - (([0, 1, 1, 1, 1, 1, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), - # Slice with step - (([0, 2], slice(None, None, 2)), [[1, 0, 1], [0, 0, 0], [1, 0, 1]]), - ], -) -def test_orthogonal_indexing_fallback_on_setitem_2d(index, expected_result): - """ - Tests the orthogonal indexing fallback on __setitem__ for a 3D matrix. - - In addition to checking expected behavior, all indexing - is also checked against numpy. - """ - # Slice + fancy index - a = np.zeros((3, 3)) - z = zarr.array(a) - z[index] = 1 - a[index] = 1 - np.testing.assert_array_equal(z, expected_result) - np.testing.assert_array_equal(z, a, err_msg="Indexing disagrees with numpy") - - -def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): - z2 = zarr.zeros((5, 5, 5)) - with pytest.raises(IndexError): - z2[[1, 2, 3], [1, 2, 3]] = 2 - with pytest.raises(IndexError): - np.testing.assert_array_equal(z2[[1, 2, 3], [1, 2, 3]], 0) - with pytest.raises(IndexError): - z2[..., [1, 2, 3]] = 2 - with pytest.raises(IndexError): - np.testing.assert_array_equal(z2[..., [1, 2, 3]], 0) - - -def test_set_basic_selection_0d(): - # setup - v = np.array(42) - a = np.zeros_like(v) - z = zarr.zeros_like(v) - assert_array_equal(a, z) - - # tests - z.set_basic_selection(Ellipsis, v) - assert_array_equal(v, z) - z[...] = 0 - assert_array_equal(a, z) - z[...] = v - assert_array_equal(v, z) - - # test structured array - value = (b"aaa", 1, 4.2) - v = np.array(value, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) - a = np.zeros_like(v) - z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) - - # tests - z.set_basic_selection(Ellipsis, v) - assert_array_equal(v, z) - z.set_basic_selection(Ellipsis, a) - assert_array_equal(a, z) - z[...] = v - assert_array_equal(v, z) - z[...] = a - assert_array_equal(a, z) - # with fields - z.set_basic_selection(Ellipsis, v["foo"], fields="foo") - assert v["foo"] == z["foo"] - assert a["bar"] == z["bar"] - assert a["baz"] == z["baz"] - z["bar"] = v["bar"] - assert v["foo"] == z["foo"] - assert v["bar"] == z["bar"] - assert a["baz"] == z["baz"] - # multiple field assignment not supported - with pytest.raises(IndexError): - z.set_basic_selection(Ellipsis, v[["foo", "bar"]], fields=["foo", "bar"]) - with pytest.raises(IndexError): - z[..., "foo", "bar"] = v[["foo", "bar"]] - - -def _test_get_orthogonal_selection(a, z, selection): - expect = oindex(a, selection) - actual = z.get_orthogonal_selection(selection) - assert_array_equal(expect, actual) - actual = z.oindex[selection] - assert_array_equal(expect, actual) - - -# noinspection PyStatementEffect -def test_get_orthogonal_selection_1d_bool(): - # setup - a = np.arange(1050, dtype=int) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - z[:] = a - - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - _test_get_orthogonal_selection(a, z, ix) - - # test errors - with pytest.raises(IndexError): - z.oindex[np.zeros(50, dtype=bool)] # too short - with pytest.raises(IndexError): - z.oindex[np.zeros(2000, dtype=bool)] # too long - with pytest.raises(IndexError): - z.oindex[[[True, False], [False, True]]] # too many dimensions - - -# noinspection PyStatementEffect -def test_get_orthogonal_selection_1d_int(): - # setup - a = np.arange(1050, dtype=int) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - z[:] = a - - np.random.seed(42) - # test with different degrees of sparseness - for p in 2, 0.5, 0.1, 0.01: - # unordered - ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - _test_get_orthogonal_selection(a, z, ix) - # increasing - ix.sort() - _test_get_orthogonal_selection(a, z, ix) - # decreasing - ix = ix[::-1] - _test_get_orthogonal_selection(a, z, ix) - - selections = basic_selections_1d + [ - # test wraparound - [0, 3, 10, -23, -12, -1], - # explicit test not sorted - [3, 105, 23, 127], - ] - for selection in selections: - _test_get_orthogonal_selection(a, z, selection) - - bad_selections = basic_selections_1d_bad + [ - [a.shape[0] + 1], # out of bounds - [-(a.shape[0] + 1)], # out of bounds - [[2, 4], [6, 8]], # too many dimensions - ] - for selection in bad_selections: - with pytest.raises(IndexError): - z.get_orthogonal_selection(selection) - with pytest.raises(IndexError): - z.oindex[selection] - - -def _test_get_orthogonal_selection_2d(a, z, ix0, ix1): - selections = [ - # index both axes with array - (ix0, ix1), - # mixed indexing with array / slice - (ix0, slice(1, 5)), - (ix0, slice(1, 5, 2)), - (slice(250, 350), ix1), - (slice(250, 350, 10), ix1), - # mixed indexing with array / int - (ix0, 4), - (42, ix1), - ] - for selection in selections: - _test_get_orthogonal_selection(a, z, selection) - - -# noinspection PyStatementEffect -def test_get_orthogonal_selection_2d(): - # setup - a = np.arange(10000, dtype=int).reshape(1000, 10) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - z[:] = a - - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - # boolean arrays - ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) - _test_get_orthogonal_selection_2d(a, z, ix0, ix1) - - # mixed int array / bool array - selections = ( - (ix0, np.nonzero(ix1)[0]), - (np.nonzero(ix0)[0], ix1), - ) - for selection in selections: - _test_get_orthogonal_selection(a, z, selection) - - # integer arrays - ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) - _test_get_orthogonal_selection_2d(a, z, ix0, ix1) - ix0.sort() - ix1.sort() - _test_get_orthogonal_selection_2d(a, z, ix0, ix1) - ix0 = ix0[::-1] - ix1 = ix1[::-1] - _test_get_orthogonal_selection_2d(a, z, ix0, ix1) - - for selection in basic_selections_2d: - _test_get_orthogonal_selection(a, z, selection) - - for selection in basic_selections_2d_bad: - with pytest.raises(IndexError): - z.get_orthogonal_selection(selection) - with pytest.raises(IndexError): - z.oindex[selection] - - -def _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2): - selections = [ - # single value - (84, 42, 4), - (-1, -1, -1), - # index all axes with array - (ix0, ix1, ix2), - # mixed indexing with single array / slices - (ix0, slice(15, 25), slice(1, 5)), - (slice(50, 70), ix1, slice(1, 5)), - (slice(50, 70), slice(15, 25), ix2), - (ix0, slice(15, 25, 5), slice(1, 5, 2)), - (slice(50, 70, 3), ix1, slice(1, 5, 2)), - (slice(50, 70, 3), slice(15, 25, 5), ix2), - # mixed indexing with single array / ints - (ix0, 42, 4), - (84, ix1, 4), - (84, 42, ix2), - # mixed indexing with single array / slice / int - (ix0, slice(15, 25), 4), - (42, ix1, slice(1, 5)), - (slice(50, 70), 42, ix2), - # mixed indexing with two array / slice - (ix0, ix1, slice(1, 5)), - (slice(50, 70), ix1, ix2), - (ix0, slice(15, 25), ix2), - # mixed indexing with two array / integer - (ix0, ix1, 4), - (42, ix1, ix2), - (ix0, 42, ix2), - ] - for selection in selections: - _test_get_orthogonal_selection(a, z, selection) - - -def test_get_orthogonal_selection_3d(): - # setup - a = np.arange(100000, dtype=int).reshape(200, 50, 10) - z = zarr.create(shape=a.shape, chunks=(60, 20, 3), dtype=a.dtype) - z[:] = a - - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - # boolean arrays - ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) - ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) - _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) - - # integer arrays - ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) - ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) - _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) - ix0.sort() - ix1.sort() - ix2.sort() - _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) - ix0 = ix0[::-1] - ix1 = ix1[::-1] - ix2 = ix2[::-1] - _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) - - -def test_orthogonal_indexing_edge_cases(): - a = np.arange(6).reshape(1, 2, 3) - z = zarr.create(shape=a.shape, chunks=(1, 2, 3), dtype=a.dtype) - z[:] = a - - expect = oindex(a, (0, slice(None), [0, 1, 2])) - actual = z.oindex[0, :, [0, 1, 2]] - assert_array_equal(expect, actual) - - expect = oindex(a, (0, slice(None), [True, True, True])) - actual = z.oindex[0, :, [True, True, True]] - assert_array_equal(expect, actual) - - -def _test_set_orthogonal_selection(v, a, z, selection): - for value in 42, oindex(v, selection), oindex(v, selection).tolist(): - if isinstance(value, list) and value == []: - # skip these cases as cannot preserve all dimensions - continue - # setup expectation - a[:] = 0 - oindex_set(a, selection, value) - # long-form API - z[:] = 0 - z.set_orthogonal_selection(selection, value) - assert_array_equal(a, z[:]) - # short-form API - z[:] = 0 - z.oindex[selection] = value - assert_array_equal(a, z[:]) - - -def test_set_orthogonal_selection_1d(): - # setup - v = np.arange(1050, dtype=int) - a = np.empty(v.shape, dtype=int) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - - # test with different degrees of sparseness - np.random.seed(42) - for p in 0.5, 0.1, 0.01: - # boolean arrays - ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - _test_set_orthogonal_selection(v, a, z, ix) - - # integer arrays - ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - _test_set_orthogonal_selection(v, a, z, ix) - ix.sort() - _test_set_orthogonal_selection(v, a, z, ix) - ix = ix[::-1] - _test_set_orthogonal_selection(v, a, z, ix) - - # basic selections - for selection in basic_selections_1d: - _test_set_orthogonal_selection(v, a, z, selection) - - -def _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1): - selections = [ - # index both axes with array - (ix0, ix1), - # mixed indexing with array / slice or int - (ix0, slice(1, 5)), - (slice(250, 350), ix1), - (ix0, 4), - (42, ix1), - ] - for selection in selections: - _test_set_orthogonal_selection(v, a, z, selection) - - -def test_set_orthogonal_selection_2d(): - # setup - v = np.arange(10000, dtype=int).reshape(1000, 10) - a = np.empty_like(v) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - # boolean arrays - ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) - _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) - - # integer arrays - ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) - _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) - ix0.sort() - ix1.sort() - _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) - ix0 = ix0[::-1] - ix1 = ix1[::-1] - _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) - - for selection in basic_selections_2d: - _test_set_orthogonal_selection(v, a, z, selection) - - -def _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2): - selections = ( - # single value - (84, 42, 4), - (-1, -1, -1), - # index all axes with bool array - (ix0, ix1, ix2), - # mixed indexing with single bool array / slice or int - (ix0, slice(15, 25), slice(1, 5)), - (slice(50, 70), ix1, slice(1, 5)), - (slice(50, 70), slice(15, 25), ix2), - (ix0, 42, 4), - (84, ix1, 4), - (84, 42, ix2), - (ix0, slice(15, 25), 4), - (slice(50, 70), ix1, 4), - (slice(50, 70), 42, ix2), - # indexing with two arrays / slice - (ix0, ix1, slice(1, 5)), - # indexing with two arrays / integer - (ix0, ix1, 4), - ) - for selection in selections: - _test_set_orthogonal_selection(v, a, z, selection) - - -def test_set_orthogonal_selection_3d(): - # setup - v = np.arange(100000, dtype=int).reshape(200, 50, 10) - a = np.empty_like(v) - z = zarr.create(shape=a.shape, chunks=(60, 20, 3), dtype=a.dtype) - - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - # boolean arrays - ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) - ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) - _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) - - # integer arrays - ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) - ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) - _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) - - # sorted increasing - ix0.sort() - ix1.sort() - ix2.sort() - _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) - - # sorted decreasing - ix0 = ix0[::-1] - ix1 = ix1[::-1] - ix2 = ix2[::-1] - _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) - - -def test_orthogonal_indexing_fallback_on_get_setitem(): - z = zarr.zeros((20, 20)) - z[[1, 2, 3], [1, 2, 3]] = 1 - np.testing.assert_array_equal( - z[:4, :4], - [ - [0, 0, 0, 0], - [0, 1, 0, 0], - [0, 0, 1, 0], - [0, 0, 0, 1], - ], - ) - np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) - # test broadcasting - np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) - # test 1D fancy indexing - z2 = zarr.zeros(5) - z2[[1, 2, 3]] = 1 - np.testing.assert_array_equal(z2, [0, 1, 1, 1, 0]) - - -def _test_get_coordinate_selection(a, z, selection): - expect = a[selection] - actual = z.get_coordinate_selection(selection) - assert_array_equal(expect, actual) - actual = z.vindex[selection] - assert_array_equal(expect, actual) - - -coordinate_selections_1d_bad = [ - # slice not supported - slice(5, 15), - slice(None), - Ellipsis, - # bad stuff - 2.3, - "foo", - b"xxx", - None, - (0, 0), - (slice(None), slice(None)), -] - - -# noinspection PyStatementEffect -def test_get_coordinate_selection_1d(): - # setup - a = np.arange(1050, dtype=int) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - z[:] = a - - np.random.seed(42) - # test with different degrees of sparseness - for p in 2, 0.5, 0.1, 0.01: - n = int(a.size * p) - ix = np.random.choice(a.shape[0], size=n, replace=True) - _test_get_coordinate_selection(a, z, ix) - ix.sort() - _test_get_coordinate_selection(a, z, ix) - ix = ix[::-1] - _test_get_coordinate_selection(a, z, ix) - - selections = [ - # test single item - 42, - -1, - # test wraparound - [0, 3, 10, -23, -12, -1], - # test out of order - [3, 105, 23, 127], # not monotonically increasing - # test multi-dimensional selection - np.array([[2, 4], [6, 8]]), - ] - for selection in selections: - _test_get_coordinate_selection(a, z, selection) - - # test errors - bad_selections = coordinate_selections_1d_bad + [ - [a.shape[0] + 1], # out of bounds - [-(a.shape[0] + 1)], # out of bounds - ] - for selection in bad_selections: - with pytest.raises(IndexError): - z.get_coordinate_selection(selection) - with pytest.raises(IndexError): - z.vindex[selection] - - -def test_get_coordinate_selection_2d(): - # setup - a = np.arange(10000, dtype=int).reshape(1000, 10) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - z[:] = a - - np.random.seed(42) - # test with different degrees of sparseness - for p in 2, 0.5, 0.1, 0.01: - n = int(a.size * p) - ix0 = np.random.choice(a.shape[0], size=n, replace=True) - ix1 = np.random.choice(a.shape[1], size=n, replace=True) - selections = [ - # single value - (42, 4), - (-1, -1), - # index both axes with array - (ix0, ix1), - # mixed indexing with array / int - (ix0, 4), - (42, ix1), - (42, 4), - ] - for selection in selections: - _test_get_coordinate_selection(a, z, selection) - - # not monotonically increasing (first dim) - ix0 = [3, 3, 4, 2, 5] - ix1 = [1, 3, 5, 7, 9] - _test_get_coordinate_selection(a, z, (ix0, ix1)) - - # not monotonically increasing (second dim) - ix0 = [1, 1, 2, 2, 5] - ix1 = [1, 3, 2, 1, 0] - _test_get_coordinate_selection(a, z, (ix0, ix1)) - - # multi-dimensional selection - ix0 = np.array([[1, 1, 2], [2, 2, 5]]) - ix1 = np.array([[1, 3, 2], [1, 0, 0]]) - _test_get_coordinate_selection(a, z, (ix0, ix1)) - - with pytest.raises(IndexError): - selection = slice(5, 15), [1, 2, 3] - z.get_coordinate_selection(selection) - with pytest.raises(IndexError): - selection = [1, 2, 3], slice(5, 15) - z.get_coordinate_selection(selection) - with pytest.raises(IndexError): - selection = Ellipsis, [1, 2, 3] - z.get_coordinate_selection(selection) - with pytest.raises(IndexError): - selection = Ellipsis - z.get_coordinate_selection(selection) - - -def _test_set_coordinate_selection(v, a, z, selection): - for value in 42, v[selection], v[selection].tolist(): - # setup expectation - a[:] = 0 - a[selection] = value - # test long-form API - z[:] = 0 - z.set_coordinate_selection(selection, value) - assert_array_equal(a, z[:]) - # test short-form API - z[:] = 0 - z.vindex[selection] = value - assert_array_equal(a, z[:]) - - -def test_set_coordinate_selection_1d(): - # setup - v = np.arange(1050, dtype=int) - a = np.empty(v.shape, dtype=v.dtype) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - - np.random.seed(42) - # test with different degrees of sparseness - for p in 2, 0.5, 0.1, 0.01: - n = int(a.size * p) - ix = np.random.choice(a.shape[0], size=n, replace=True) - _test_set_coordinate_selection(v, a, z, ix) - - # multi-dimensional selection - ix = np.array([[2, 4], [6, 8]]) - _test_set_coordinate_selection(v, a, z, ix) - - for selection in coordinate_selections_1d_bad: - with pytest.raises(IndexError): - z.set_coordinate_selection(selection, 42) - with pytest.raises(IndexError): - z.vindex[selection] = 42 - - -def test_set_coordinate_selection_2d(): - # setup - v = np.arange(10000, dtype=int).reshape(1000, 10) - a = np.empty_like(v) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - - np.random.seed(42) - # test with different degrees of sparseness - for p in 2, 0.5, 0.1, 0.01: - n = int(a.size * p) - ix0 = np.random.choice(a.shape[0], size=n, replace=True) - ix1 = np.random.choice(a.shape[1], size=n, replace=True) - - selections = ( - (42, 4), - (-1, -1), - # index both axes with array - (ix0, ix1), - # mixed indexing with array / int - (ix0, 4), - (42, ix1), - ) - for selection in selections: - _test_set_coordinate_selection(v, a, z, selection) - - # multi-dimensional selection - ix0 = np.array([[1, 2, 3], [4, 5, 6]]) - ix1 = np.array([[1, 3, 2], [2, 0, 5]]) - _test_set_coordinate_selection(v, a, z, (ix0, ix1)) - - -def _test_get_block_selection(a, z, selection, expected_idx): - expect = a[expected_idx] - actual = z.get_block_selection(selection) - assert_array_equal(expect, actual) - actual = z.blocks[selection] - assert_array_equal(expect, actual) - - -block_selections_1d = [ - # test single item - 0, - 5, - # test wraparound - -1, - -4, - # test slice - slice(5), - slice(None, 3), - slice(5, 6), - slice(-3, -1), - slice(None), # Full slice -] - -block_selections_1d_array_projection = [ - # test single item - slice(100), - slice(500, 600), - # test wraparound - slice(1000, None), - slice(700, 800), - # test slice - slice(500), - slice(None, 300), - slice(500, 600), - slice(800, 1000), - slice(None), -] - -block_selections_1d_bad = [ - # slice not supported - slice(3, 8, 2), - # bad stuff - 2.3, - "foo", - b"xxx", - None, - (0, 0), - (slice(None), slice(None)), - [0, 5, 3], -] - - -def test_get_block_selection_1d(): - # setup - a = np.arange(1050, dtype=int) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - z[:] = a - - for selection, expected_idx in zip(block_selections_1d, block_selections_1d_array_projection): - _test_get_block_selection(a, z, selection, expected_idx) - - bad_selections = block_selections_1d_bad + [ - z.nchunks + 1, # out of bounds - -(z.nchunks + 1), # out of bounds - ] - - for selection in bad_selections: - with pytest.raises(IndexError): - z.get_block_selection(selection) - with pytest.raises(IndexError): - z.blocks[selection] - - -block_selections_2d = [ - # test single item - (0, 0), - (1, 2), - # test wraparound - (-1, -1), - (-3, -2), - # test slice - (slice(1), slice(2)), - (slice(None, 2), slice(-2, -1)), - (slice(2, 3), slice(-2, None)), - (slice(-3, -1), slice(-3, -2)), - (slice(None), slice(None)), # Full slice -] - -block_selections_2d_array_projection = [ - # test single item - (slice(300), slice(3)), - (slice(300, 600), slice(6, 9)), - # test wraparound - (slice(900, None), slice(9, None)), - (slice(300, 600), slice(6, 9)), - # test slice - (slice(300), slice(6)), - (slice(None, 600), slice(6, 9)), - (slice(600, 900), slice(6, None)), - (slice(300, 900), slice(3, 6)), - (slice(None), slice(None)), # Full slice -] - - -def test_get_block_selection_2d(): - # setup - a = np.arange(10000, dtype=int).reshape(1000, 10) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - z[:] = a - - for selection, expected_idx in zip(block_selections_2d, block_selections_2d_array_projection): - _test_get_block_selection(a, z, selection, expected_idx) - - with pytest.raises(IndexError): - selection = slice(5, 15), [1, 2, 3] - z.get_block_selection(selection) - with pytest.raises(IndexError): - selection = Ellipsis, [1, 2, 3] - z.get_block_selection(selection) - with pytest.raises(IndexError): # out of bounds - selection = slice(15, 20), slice(None) - z.get_block_selection(selection) - - -def _test_set_block_selection(v: np.ndarray, a: np.ndarray, z: zarr.Array, selection, expected_idx): - for value in 42, v[expected_idx], v[expected_idx].tolist(): - # setup expectation - a[:] = 0 - a[expected_idx] = value - # test long-form API - z[:] = 0 - z.set_block_selection(selection, value) - assert_array_equal(a, z[:]) - # test short-form API - z[:] = 0 - z.blocks[selection] = value - assert_array_equal(a, z[:]) - - -def test_set_block_selection_1d(): - # setup - v = np.arange(1050, dtype=int) - a = np.empty(v.shape, dtype=v.dtype) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - - for selection, expected_idx in zip(block_selections_1d, block_selections_1d_array_projection): - _test_set_block_selection(v, a, z, selection, expected_idx) - - for selection in block_selections_1d_bad: - with pytest.raises(IndexError): - z.set_block_selection(selection, 42) - with pytest.raises(IndexError): - z.blocks[selection] = 42 - - -def test_set_block_selection_2d(): - # setup - v = np.arange(10000, dtype=int).reshape(1000, 10) - a = np.empty(v.shape, dtype=v.dtype) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - - for selection, expected_idx in zip(block_selections_2d, block_selections_2d_array_projection): - _test_set_block_selection(v, a, z, selection, expected_idx) - - with pytest.raises(IndexError): - selection = slice(5, 15), [1, 2, 3] - z.set_block_selection(selection, 42) - with pytest.raises(IndexError): - selection = Ellipsis, [1, 2, 3] - z.set_block_selection(selection, 42) - with pytest.raises(IndexError): # out of bounds - selection = slice(15, 20), slice(None) - z.set_block_selection(selection, 42) - - -def _test_get_mask_selection(a, z, selection): - expect = a[selection] - actual = z.get_mask_selection(selection) - assert_array_equal(expect, actual) - actual = z.vindex[selection] - assert_array_equal(expect, actual) - - -mask_selections_1d_bad = [ - # slice not supported - slice(5, 15), - slice(None), - Ellipsis, - # bad stuff - 2.3, - "foo", - b"xxx", - None, - (0, 0), - (slice(None), slice(None)), -] - - -# noinspection PyStatementEffect -def test_get_mask_selection_1d(): - # setup - a = np.arange(1050, dtype=int) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - z[:] = a - - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - _test_get_mask_selection(a, z, ix) - - # test errors - bad_selections = mask_selections_1d_bad + [ - np.zeros(50, dtype=bool), # too short - np.zeros(2000, dtype=bool), # too long - [[True, False], [False, True]], # too many dimensions - ] - for selection in bad_selections: - with pytest.raises(IndexError): - z.get_mask_selection(selection) - with pytest.raises(IndexError): - z.vindex[selection] - - -# noinspection PyStatementEffect -def test_get_mask_selection_2d(): - # setup - a = np.arange(10000, dtype=int).reshape(1000, 10) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - z[:] = a - - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape) - _test_get_mask_selection(a, z, ix) - - # test errors - with pytest.raises(IndexError): - z.vindex[np.zeros((1000, 5), dtype=bool)] # too short - with pytest.raises(IndexError): - z.vindex[np.zeros((2000, 10), dtype=bool)] # too long - with pytest.raises(IndexError): - z.vindex[[True, False]] # wrong no. dimensions - - -def _test_set_mask_selection(v, a, z, selection): - a[:] = 0 - z[:] = 0 - a[selection] = v[selection] - z.set_mask_selection(selection, v[selection]) - assert_array_equal(a, z[:]) - z[:] = 0 - z.vindex[selection] = v[selection] - assert_array_equal(a, z[:]) - - -def test_set_mask_selection_1d(): - # setup - v = np.arange(1050, dtype=int) - a = np.empty_like(v) - z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - _test_set_mask_selection(v, a, z, ix) - - for selection in mask_selections_1d_bad: - with pytest.raises(IndexError): - z.set_mask_selection(selection, 42) - with pytest.raises(IndexError): - z.vindex[selection] = 42 - - -def test_set_mask_selection_2d(): - # setup - v = np.arange(10000, dtype=int).reshape(1000, 10) - a = np.empty_like(v) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape) - _test_set_mask_selection(v, a, z, ix) - - -def test_get_selection_out(): - # basic selections - a = np.arange(1050) - z = zarr.create(shape=1050, chunks=100, dtype=a.dtype) - z[:] = a - selections = [ - slice(50, 150), - slice(0, 1050), - slice(1, 2), - ] - for selection in selections: - expect = a[selection] - out = zarr.create(shape=expect.shape, chunks=10, dtype=expect.dtype, fill_value=0) - z.get_basic_selection(selection, out=out) - assert_array_equal(expect, out[:]) - - with pytest.raises(TypeError): - z.get_basic_selection(Ellipsis, out=[]) - - # orthogonal selections - a = np.arange(10000, dtype=int).reshape(1000, 10) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - z[:] = a - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) - selections = [ - # index both axes with array - (ix0, ix1), - # mixed indexing with array / slice - (ix0, slice(1, 5)), - (slice(250, 350), ix1), - # mixed indexing with array / int - (ix0, 4), - (42, ix1), - # mixed int array / bool array - (ix0, np.nonzero(ix1)[0]), - (np.nonzero(ix0)[0], ix1), - ] - for selection in selections: - expect = oindex(a, selection) - # out = zarr.create(shape=expect.shape, chunks=10, dtype=expect.dtype, - # fill_value=0) - out = np.zeros(expect.shape, dtype=expect.dtype) - z.get_orthogonal_selection(selection, out=out) - assert_array_equal(expect, out[:]) - - # coordinate selections - a = np.arange(10000, dtype=int).reshape(1000, 10) - z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - z[:] = a - np.random.seed(42) - # test with different degrees of sparseness - for p in 0.5, 0.1, 0.01: - n = int(a.size * p) - ix0 = np.random.choice(a.shape[0], size=n, replace=True) - ix1 = np.random.choice(a.shape[1], size=n, replace=True) - selections = [ - # index both axes with array - (ix0, ix1), - # mixed indexing with array / int - (ix0, 4), - (42, ix1), - ] - for selection in selections: - expect = a[selection] - out = np.zeros(expect.shape, dtype=expect.dtype) - z.get_coordinate_selection(selection, out=out) - assert_array_equal(expect, out[:]) - - -def test_get_selections_with_fields(): - a = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] - a = np.array(a, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) - z = zarr.create(shape=a.shape, chunks=2, dtype=a.dtype, fill_value=None) - z[:] = a - - fields_fixture = [ - "foo", - ["foo"], - ["foo", "bar"], - ["foo", "baz"], - ["bar", "baz"], - ["foo", "bar", "baz"], - ["bar", "foo"], - ["baz", "bar", "foo"], - ] - - for fields in fields_fixture: - # total selection - expect = a[fields] - actual = z.get_basic_selection(Ellipsis, fields=fields) - assert_array_equal(expect, actual) - # alternative API - if isinstance(fields, str): - actual = z[fields] - assert_array_equal(expect, actual) - elif len(fields) == 2: - actual = z[fields[0], fields[1]] - assert_array_equal(expect, actual) - if isinstance(fields, str): - actual = z[..., fields] - assert_array_equal(expect, actual) - elif len(fields) == 2: - actual = z[..., fields[0], fields[1]] - assert_array_equal(expect, actual) - - # basic selection with slice - expect = a[fields][0:2] - actual = z.get_basic_selection(slice(0, 2), fields=fields) - assert_array_equal(expect, actual) - # alternative API - if isinstance(fields, str): - actual = z[0:2, fields] - assert_array_equal(expect, actual) - elif len(fields) == 2: - actual = z[0:2, fields[0], fields[1]] - assert_array_equal(expect, actual) - - # basic selection with single item - expect = a[fields][1] - actual = z.get_basic_selection(1, fields=fields) - assert_array_equal(expect, actual) - # alternative API - if isinstance(fields, str): - actual = z[1, fields] - assert_array_equal(expect, actual) - elif len(fields) == 2: - actual = z[1, fields[0], fields[1]] - assert_array_equal(expect, actual) - - # orthogonal selection - ix = [0, 2] - expect = a[fields][ix] - actual = z.get_orthogonal_selection(ix, fields=fields) - assert_array_equal(expect, actual) - # alternative API - if isinstance(fields, str): - actual = z.oindex[ix, fields] - assert_array_equal(expect, actual) - elif len(fields) == 2: - actual = z.oindex[ix, fields[0], fields[1]] - assert_array_equal(expect, actual) - - # coordinate selection - ix = [0, 2] - expect = a[fields][ix] - actual = z.get_coordinate_selection(ix, fields=fields) - assert_array_equal(expect, actual) - # alternative API - if isinstance(fields, str): - actual = z.vindex[ix, fields] - assert_array_equal(expect, actual) - elif len(fields) == 2: - actual = z.vindex[ix, fields[0], fields[1]] - assert_array_equal(expect, actual) - - # mask selection - ix = [True, False, True] - expect = a[fields][ix] - actual = z.get_mask_selection(ix, fields=fields) - assert_array_equal(expect, actual) - # alternative API - if isinstance(fields, str): - actual = z.vindex[ix, fields] - assert_array_equal(expect, actual) - elif len(fields) == 2: - actual = z.vindex[ix, fields[0], fields[1]] - assert_array_equal(expect, actual) - - # missing/bad fields - with pytest.raises(IndexError): - z.get_basic_selection(Ellipsis, fields=["notafield"]) - with pytest.raises(IndexError): - z.get_basic_selection(Ellipsis, fields=slice(None)) - - -def test_set_selections_with_fields(): - v = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] - v = np.array(v, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) - a = np.empty_like(v) - z = zarr.empty_like(v, chunks=2) - - fields_fixture = [ - "foo", - [], - ["foo"], - ["foo", "bar"], - ["foo", "baz"], - ["bar", "baz"], - ["foo", "bar", "baz"], - ["bar", "foo"], - ["baz", "bar", "foo"], - ] - - for fields in fields_fixture: - # currently multi-field assignment is not supported in numpy, so we won't support - # it either - if isinstance(fields, list) and len(fields) > 1: - with pytest.raises(IndexError): - z.set_basic_selection(Ellipsis, v, fields=fields) - with pytest.raises(IndexError): - z.set_orthogonal_selection([0, 2], v, fields=fields) - with pytest.raises(IndexError): - z.set_coordinate_selection([0, 2], v, fields=fields) - with pytest.raises(IndexError): - z.set_mask_selection([True, False, True], v, fields=fields) - - else: - if isinstance(fields, list) and len(fields) == 1: - # work around numpy does not support multi-field assignment even if there - # is only one field - key = fields[0] - elif isinstance(fields, list) and len(fields) == 0: - # work around numpy ambiguity about what is a field selection - key = Ellipsis - else: - key = fields - - # setup expectation - a[:] = ("", 0, 0) - z[:] = ("", 0, 0) - assert_array_equal(a, z[:]) - a[key] = v[key] - # total selection - z.set_basic_selection(Ellipsis, v[key], fields=fields) - assert_array_equal(a, z[:]) - - # basic selection with slice - a[:] = ("", 0, 0) - z[:] = ("", 0, 0) - a[key][0:2] = v[key][0:2] - z.set_basic_selection(slice(0, 2), v[key][0:2], fields=fields) - assert_array_equal(a, z[:]) - - # orthogonal selection - a[:] = ("", 0, 0) - z[:] = ("", 0, 0) - ix = [0, 2] - a[key][ix] = v[key][ix] - z.set_orthogonal_selection(ix, v[key][ix], fields=fields) - assert_array_equal(a, z[:]) - - # coordinate selection - a[:] = ("", 0, 0) - z[:] = ("", 0, 0) - ix = [0, 2] - a[key][ix] = v[key][ix] - z.set_coordinate_selection(ix, v[key][ix], fields=fields) - assert_array_equal(a, z[:]) - - # mask selection - a[:] = ("", 0, 0) - z[:] = ("", 0, 0) - ix = [True, False, True] - a[key][ix] = v[key][ix] - z.set_mask_selection(ix, v[key][ix], fields=fields) - assert_array_equal(a, z[:]) - - -@pytest.mark.parametrize( - "selection, arr, expected", - [ - ( - (slice(5, 8, 1), slice(2, 4, 1), slice(0, 100, 1)), - np.arange(2, 100_002).reshape((100, 10, 100)), - [ - (5200, 200, (slice(5, 6, 1), slice(2, 4, 1))), - (6200, 200, (slice(6, 7, 1), slice(2, 4, 1))), - (7200, 200, (slice(7, 8, 1), slice(2, 4, 1))), - ], - ), - ( - (slice(5, 8, 1), slice(2, 4, 1), slice(0, 5, 1)), - np.arange(2, 100_002).reshape((100, 10, 100)), - [ - (5200.0, 5.0, (slice(5, 6, 1), slice(2, 3, 1), slice(0, 5, 1))), - (5300.0, 5.0, (slice(5, 6, 1), slice(3, 4, 1), slice(0, 5, 1))), - (6200.0, 5.0, (slice(6, 7, 1), slice(2, 3, 1), slice(0, 5, 1))), - (6300.0, 5.0, (slice(6, 7, 1), slice(3, 4, 1), slice(0, 5, 1))), - (7200.0, 5.0, (slice(7, 8, 1), slice(2, 3, 1), slice(0, 5, 1))), - (7300.0, 5.0, (slice(7, 8, 1), slice(3, 4, 1), slice(0, 5, 1))), - ], - ), - ( - (slice(5, 8, 1), slice(2, 4, 1), slice(0, 5, 1)), - np.asfortranarray(np.arange(2, 100_002).reshape((100, 10, 100))), - [ - (5200.0, 5.0, (slice(5, 6, 1), slice(2, 3, 1), slice(0, 5, 1))), - (5300.0, 5.0, (slice(5, 6, 1), slice(3, 4, 1), slice(0, 5, 1))), - (6200.0, 5.0, (slice(6, 7, 1), slice(2, 3, 1), slice(0, 5, 1))), - (6300.0, 5.0, (slice(6, 7, 1), slice(3, 4, 1), slice(0, 5, 1))), - (7200.0, 5.0, (slice(7, 8, 1), slice(2, 3, 1), slice(0, 5, 1))), - (7300.0, 5.0, (slice(7, 8, 1), slice(3, 4, 1), slice(0, 5, 1))), - ], - ), - ( - (slice(5, 8, 1), slice(2, 4, 1)), - np.arange(2, 100_002).reshape((100, 10, 100)), - [ - (5200, 200, (slice(5, 6, 1), slice(2, 4, 1))), - (6200, 200, (slice(6, 7, 1), slice(2, 4, 1))), - (7200, 200, (slice(7, 8, 1), slice(2, 4, 1))), - ], - ), - ( - (slice(0, 10, 1),), - np.arange(0, 10).reshape(10), - [(0, 10, (slice(0, 10, 1),))], - ), - ((0,), np.arange(0, 100).reshape((10, 10)), [(0, 10, (slice(0, 1, 1),))]), - ( - ( - 0, - 0, - ), - np.arange(0, 100).reshape((10, 10)), - [(0, 1, (slice(0, 1, 1), slice(0, 1, 1)))], - ), - ((0,), np.arange(0, 10).reshape(10), [(0, 1, (slice(0, 1, 1),))]), - pytest.param( - (slice(5, 8, 1), slice(2, 4, 1), slice(0, 5, 1)), - np.arange(2, 100002).reshape((10, 1, 10000)), - None, - marks=[pytest.mark.xfail(reason="slice 2 is out of range")], - ), - pytest.param( - (slice(5, 8, 1), slice(2, 4, 1), slice(0, 5, 1)), - np.arange(2, 100_002).reshape((10, 10_000)), - None, - marks=[pytest.mark.xfail(reason="slice 2 is out of range")], - ), - ], -) -def test_PartialChunkIterator(selection, arr, expected): - PCI = PartialChunkIterator(selection, arr.shape) - results = list(PCI) - assert results == expected - - -def test_slice_selection_uints(): - arr = np.arange(24).reshape((4, 6)) - idx = np.uint64(3) - slice_sel = make_slice_selection((idx,)) - assert arr[tuple(slice_sel)].shape == (1, 6) - - -def test_numpy_int_indexing(): - a = np.arange(1050) - z = zarr.create(shape=1050, chunks=100, dtype=a.dtype) - z[:] = a - assert a[42] == z[42] - assert a[numpy.int64(42)] == z[numpy.int64(42)] - - -@pytest.mark.parametrize( - "shape, chunks, ops", - [ - # 1D test cases - ((1070,), (50,), [("__getitem__", (slice(200, 400),))]), - ((1070,), (50,), [("__getitem__", (slice(200, 400, 100),))]), - ( - (1070,), - (50,), - [ - ("__getitem__", (slice(200, 400),)), - ("__setitem__", (slice(200, 400, 100),)), - ], - ), - # 2D test cases - ( - (40, 50), - (5, 8), - [ - ("__getitem__", (slice(6, 37, 13), (slice(4, 10)))), - ("__setitem__", (slice(None), (slice(None)))), - ], - ), - ], -) -def test_accessed_chunks(shape, chunks, ops): - # Test that only the required chunks are accessed during basic selection operations - # shape: array shape - # chunks: chunk size - # ops: list of tuples with (optype, tuple of slices) - # optype = "__getitem__" or "__setitem__", tuple length must match number of dims - import itertools - - # Use a counting dict as the backing store so we can track the items access - store = CountingDict() - z = zarr.create(shape=shape, chunks=chunks, store=store) - - for ii, (optype, slices) in enumerate(ops): - # Resolve the slices into the accessed chunks for each dimension - chunks_per_dim = [ - np.unique(np.arange(N, dtype=int)[sl] // C) for N, C, sl in zip(shape, chunks, slices) - ] - - # Combine and generate the cartesian product to determine the chunks keys that - # will be accessed - chunks_accessed = ( - ".".join([str(ci) for ci in comb]) for comb in itertools.product(*chunks_per_dim) - ) - counts_before = store.counter.copy() - - # Perform the operation - if optype == "__getitem__": - z[slices] - else: - z[slices] = ii - - # Get the change in counts - delta_counts = store.counter - counts_before - - # Check that the access counts for the operation have increased by one for all - # the chunks we expect to be included - for ci in chunks_accessed: - assert delta_counts.pop((optype, ci)) == 1 - - # If the chunk was partially written to it will also have been read once. We - # don't determine if the chunk was actually partial here, just that the - # counts are consistent that this might have happened - if optype == "__setitem__": - assert ("__getitem__", ci) not in delta_counts or delta_counts.pop( - ("__getitem__", ci) - ) == 1 - # Check that no other chunks were accessed - assert len(delta_counts) == 0 diff --git a/zarr/tests/test_info.py b/zarr/tests/test_info.py deleted file mode 100644 index 96eae999f4..0000000000 --- a/zarr/tests/test_info.py +++ /dev/null @@ -1,66 +0,0 @@ -import numcodecs -import pytest - -import zarr -from zarr.util import InfoReporter - - -@pytest.mark.parametrize("array_size", [10, 15000]) -def test_info(array_size): - # setup - g = zarr.group(store=dict(), chunk_store=dict(), synchronizer=zarr.ThreadSynchronizer()) - g.create_group("foo") - z = g.zeros("bar", shape=array_size, filters=[numcodecs.Adler32()]) - - # test group info - items = g.info_items() - keys = sorted([k for k, _ in items]) - expected_keys = sorted( - [ - "Type", - "Read-only", - "Synchronizer type", - "Store type", - "Chunk store type", - "No. members", - "No. arrays", - "No. groups", - "Arrays", - "Groups", - "Name", - ] - ) - assert expected_keys == keys - - # can also get a string representation of info via the info attribute - assert isinstance(g.info, InfoReporter) - assert "Type" in repr(g.info) - - # test array info - items = z.info_items() - keys = sorted([k for k, _ in items]) - expected_keys = sorted( - [ - "Type", - "Data type", - "Shape", - "Chunk shape", - "Order", - "Read-only", - "Filter [0]", - "Compressor", - "Synchronizer type", - "Store type", - "Chunk store type", - "No. bytes", - "No. bytes stored", - "Storage ratio", - "Chunks initialized", - "Name", - ] - ) - assert expected_keys == keys - - # can also get a string representation of info via the info attribute - assert isinstance(z.info, InfoReporter) - assert "Type" in repr(z.info) diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py deleted file mode 100644 index 57ab9a0781..0000000000 --- a/zarr/tests/test_meta.py +++ /dev/null @@ -1,640 +0,0 @@ -import base64 -import copy -import json - -import numpy as np -import pytest - -from zarr.codecs import Blosc, Delta, Pickle, Zlib, Zstd -from zarr.errors import MetadataError -from zarr.meta import ( - ZARR_FORMAT, - decode_array_metadata, - decode_dtype, - decode_group_metadata, - encode_array_metadata, - encode_dtype, - encode_fill_value, - decode_fill_value, - get_extended_dtype_info, - _v3_complex_types, - _v3_datetime_types, - _default_entry_point_metadata_v3, - Metadata3, -) -from zarr.util import normalize_dtype, normalize_fill_value - - -def assert_json_equal(expect, actual): - if isinstance(actual, bytes): - actual = str(actual, "ascii") - ej = json.loads(expect) - aj = json.loads(actual) - assert ej == aj - - -def test_encode_decode_array_1(): - meta = dict( - shape=(100,), - chunks=(10,), - dtype=np.dtype("U4", " CuPyCPUCompressor: - if compressor: - compressor = getattr(zarr.codecs, compressor)() - return CuPyCPUCompressor(compressor) - - -def init_store(tmp_path, store_type) -> Optional[Store]: - if store_type is DirectoryStore: - return store_type(str(tmp_path / "store")) - if store_type is MemoryStore: - return MemoryStore() - return None - - -def ensure_module(module): - if isinstance(module, str): - return pytest.importorskip(module) - return module - - -param_module_and_compressor = [ - (MyArray, None), - ("cupy", init_compressor(None)), - ("cupy", init_compressor("Zlib")), - ("cupy", init_compressor("Blosc")), -] - - -@pytest.mark.parametrize("module, compressor", param_module_and_compressor) -@pytest.mark.parametrize("store_type", [None, DirectoryStore, MemoryStore, ZipStore]) -def test_array(tmp_path, module, compressor, store_type): - xp = ensure_module(module) - - store = init_store(tmp_path / "from_cupy_array", store_type) - a = xp.arange(100) - z = array(a, chunks=10, compressor=compressor, store=store, meta_array=xp.empty(())) - assert a.shape == z.shape - assert a.dtype == z.dtype - assert isinstance(a, type(z[:])) - assert isinstance(z.meta_array, type(xp.empty(()))) - xp.testing.assert_array_equal(a, z[:]) - - # with array-like - store = init_store(tmp_path / "from_list", store_type) - a = list(range(100)) - z = array(a, chunks=10, compressor=compressor, store=store, meta_array=xp.empty(())) - assert (100,) == z.shape - assert np.asarray(a).dtype == z.dtype - xp.testing.assert_array_equal(a, z[:]) - - # with another zarr array - store = init_store(tmp_path / "from_another_store", store_type) - z2 = array(z, compressor=compressor, store=store, meta_array=xp.empty(())) - assert z.shape == z2.shape - assert z.chunks == z2.chunks - assert z.dtype == z2.dtype - xp.testing.assert_array_equal(z[:], z2[:]) - - store = init_store(tmp_path / "open_array", store_type) - a = xp.arange(100) - z = open_array( - store, - shape=a.shape, - dtype=a.dtype, - chunks=10, - compressor=compressor, - meta_array=xp.empty(()), - ) - z[:] = a - assert a.shape == z.shape - assert a.dtype == z.dtype - assert isinstance(a, type(z[:])) - assert isinstance(z.meta_array, type(xp.empty(()))) - xp.testing.assert_array_equal(a, z[:]) - - -@pytest.mark.parametrize("module, compressor", param_module_and_compressor) -def test_empty(module, compressor): - xp = ensure_module(module) - z = empty( - 100, - chunks=10, - compressor=compressor, - meta_array=xp.empty(()), - ) - assert (100,) == z.shape - assert (10,) == z.chunks - - -@pytest.mark.parametrize("module, compressor", param_module_and_compressor) -def test_zeros(module, compressor): - xp = ensure_module(module) - z = zeros( - 100, - chunks=10, - compressor=compressor, - meta_array=xp.empty(()), - ) - assert (100,) == z.shape - assert (10,) == z.chunks - xp.testing.assert_array_equal(np.zeros(100), z[:]) - - -@pytest.mark.parametrize("module, compressor", param_module_and_compressor) -def test_ones(module, compressor): - xp = ensure_module(module) - z = ones( - 100, - chunks=10, - compressor=compressor, - meta_array=xp.empty(()), - ) - assert (100,) == z.shape - assert (10,) == z.chunks - xp.testing.assert_array_equal(np.ones(100), z[:]) - - -@pytest.mark.parametrize("module, compressor", param_module_and_compressor) -def test_full(module, compressor): - xp = ensure_module(module) - z = full( - 100, - chunks=10, - fill_value=42, - dtype="i4", - compressor=compressor, - meta_array=xp.empty(()), - ) - assert (100,) == z.shape - assert (10,) == z.chunks - xp.testing.assert_array_equal(np.full(100, fill_value=42, dtype="i4"), z[:]) - - # nan - z = full( - 100, - chunks=10, - fill_value=np.nan, - dtype="f8", - compressor=compressor, - meta_array=xp.empty(()), - ) - assert np.all(np.isnan(z[:])) - - -@pytest.mark.parametrize("group_create_function", [group, open_group]) -@pytest.mark.parametrize("module, compressor", param_module_and_compressor) -@pytest.mark.parametrize("store_type", [None, DirectoryStore, MemoryStore, ZipStore]) -def test_group(tmp_path, group_create_function, module, compressor, store_type): - xp = ensure_module(module) - store = init_store(tmp_path, store_type) - g = group_create_function(store, meta_array=xp.empty(())) - g.ones("data", shape=(10, 11), dtype=int, compressor=compressor) - a = g["data"] - assert a.shape == (10, 11) - assert a.dtype == int - assert isinstance(a, Array) - assert isinstance(a[:], type(xp.empty(()))) - assert (a[:] == 1).all() - assert isinstance(g.meta_array, type(xp.empty(()))) diff --git a/zarr/tests/test_n5.py b/zarr/tests/test_n5.py deleted file mode 100644 index 2602aa06c1..0000000000 --- a/zarr/tests/test_n5.py +++ /dev/null @@ -1,53 +0,0 @@ -import pytest - -from zarr.n5 import N5ChunkWrapper, N5FSStore -from zarr.creation import create -from zarr.storage import atexit_rmtree -from numcodecs import GZip -import numpy as np -from typing import Tuple -import json -import atexit - -from zarr.tests.util import have_fsspec - - -def test_make_n5_chunk_wrapper(): - dtype = "uint8" - chunk_shape = (10,) - codec = GZip() - # ValueError when specifying both compressor and compressor_config - with pytest.raises(ValueError): - N5ChunkWrapper( - dtype, chunk_shape=chunk_shape, compressor_config=codec.get_config(), compressor=codec - ) - - wrapper_a = N5ChunkWrapper(dtype, chunk_shape=chunk_shape, compressor_config=codec.get_config()) - wrapper_b = N5ChunkWrapper(dtype, chunk_shape=chunk_shape, compressor=codec) - assert wrapper_a == wrapper_b - - -@pytest.mark.parametrize("chunk_shape", ((2,), (4, 4), (8, 8, 8))) -def test_partial_chunk_decode(chunk_shape: Tuple[int, ...]): - # Test that the N5Chunk wrapper can handle fractional chunks that - # may be generated by other N5 implementations - dtype = "uint8" - codec = GZip() - codec_wrapped = N5ChunkWrapper(dtype, chunk_shape=chunk_shape, compressor=codec) - subslices = tuple(slice(0, cs // 2) for cs in chunk_shape) - chunk = np.zeros(chunk_shape, dtype=dtype) - chunk[subslices] = 1 - subchunk = np.ascontiguousarray(chunk[subslices]) - assert np.array_equal(codec_wrapped.decode(codec_wrapped.encode(subchunk)), chunk) - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -def test_dtype_decode(): - path = "data/array.n5" - atexit_rmtree(path) - atexit.register(atexit_rmtree, path) - n5_store = N5FSStore(path) - create(100, store=n5_store) - dtype_n5 = json.loads(n5_store[".zarray"])["dtype"] - dtype_zarr = json.loads(create(100).store[".zarray"])["dtype"] - assert dtype_n5 == dtype_zarr diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py deleted file mode 100644 index da690f5959..0000000000 --- a/zarr/tests/test_storage.py +++ /dev/null @@ -1,2632 +0,0 @@ -import array -import atexit -import json -import os -import pathlib -import sys -import pickle -import shutil -import tempfile -from contextlib import contextmanager -from pickle import PicklingError -from zipfile import ZipFile - -import numpy as np -import pytest -from numpy.testing import assert_array_almost_equal, assert_array_equal - -from numcodecs.compat import ensure_bytes - -import zarr -from zarr._storage.store import _get_hierarchy_metadata -from zarr.codecs import BZ2, AsType, Blosc, Zlib -from zarr.context import Context -from zarr.convenience import consolidate_metadata -from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataError -from zarr.hierarchy import group -from zarr.meta import ZARR_FORMAT, decode_array_metadata -from zarr.n5 import N5Store, N5FSStore, N5_FORMAT, n5_attrs_key -from zarr.storage import ( - ABSStore, - ConsolidatedMetadataStore, - DBMStore, - DictStore, - DirectoryStore, - KVStore, - LMDBStore, - LRUStoreCache, - MemoryStore, - MongoDBStore, - NestedDirectoryStore, - RedisStore, - SQLiteStore, - Store, - TempStore, - ZipStore, - array_meta_key, - atexit_rmglob, - atexit_rmtree, - attrs_key, - data_root, - default_compressor, - getsize, - group_meta_key, - init_array, - init_group, - migrate_1to2, - meta_root, - normalize_store_arg, -) -from zarr.storage import FSStore, rename, listdir -from zarr._storage.v3 import KVStoreV3 -from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var, abs_container, mktemp -from zarr.util import ConstantMap, json_dumps - - -@contextmanager -def does_not_raise(): - yield - - -@pytest.fixture( - params=[ - (None, "."), - (".", "."), - ("/", "/"), - ] -) -def dimension_separator_fixture(request): - return request.param - - -def skip_if_nested_chunks(**kwargs): - if kwargs.get("dimension_separator") == "/": - pytest.skip("nested chunks are unsupported") - - -def test_kvstore_repr(): - repr(KVStore(dict())) - - -def test_ensure_store(): - class InvalidStore: - pass - - with pytest.raises(ValueError): - Store._ensure_store(InvalidStore()) - - # cannot initialize with a store from a different Zarr version - with pytest.raises(ValueError): - Store._ensure_store(KVStoreV3(dict())) - - # cannot initialize without a store - with pytest.raises(ValueError): - Store._ensure_store(None) - - -def test_capabilities(): - s = KVStore(dict()) - assert s.is_readable() - assert s.is_listable() - assert s.is_erasable() - assert s.is_writeable() - - -def test_getsize_non_implemented(): - assert getsize(object()) == -1 - - -def test_kvstore_eq(): - assert KVStore(dict()) != dict() - - -def test_coverage_rename(): - store = dict() - store["a"] = 1 - rename(store, "a", "b") - - -def test_deprecated_listdir_nosotre(): - store = dict() - with pytest.warns(UserWarning, match="has no `listdir`"): - listdir(store) - - -class StoreTests: - """Abstract store tests.""" - - version = 2 - root = "" - - def create_store(self, **kwargs): # pragma: no cover - # implement in sub-class - raise NotImplementedError - - def test_context_manager(self): - with self.create_store(): - pass - - def test_get_set_del_contains(self): - store = self.create_store() - - # test __contains__, __getitem__, __setitem__ - key = self.root + "foo" - assert key not in store - with pytest.raises(KeyError): - # noinspection PyStatementEffect - store[key] - store[key] = b"bar" - assert key in store - assert b"bar" == ensure_bytes(store[key]) - - # test __delitem__ (optional) - try: - del store[key] - except NotImplementedError: - pass - else: - assert key not in store - with pytest.raises(KeyError): - # noinspection PyStatementEffect - store[key] - with pytest.raises(KeyError): - # noinspection PyStatementEffect - del store[key] - - store.close() - - def test_set_invalid_content(self): - store = self.create_store() - - with pytest.raises(TypeError): - store[self.root + "baz"] = list(range(5)) - - store.close() - - def test_clear(self): - store = self.create_store() - store[self.root + "foo"] = b"bar" - store[self.root + "baz"] = b"qux" - assert len(store) == 2 - store.clear() - assert len(store) == 0 - assert self.root + "foo" not in store - assert self.root + "baz" not in store - - store.close() - - def test_pop(self): - store = self.create_store() - store[self.root + "foo"] = b"bar" - store[self.root + "baz"] = b"qux" - assert len(store) == 2 - v = store.pop(self.root + "foo") - assert ensure_bytes(v) == b"bar" - assert len(store) == 1 - v = store.pop(self.root + "baz") - assert ensure_bytes(v) == b"qux" - assert len(store) == 0 - with pytest.raises(KeyError): - store.pop(self.root + "xxx") - v = store.pop(self.root + "xxx", b"default") - assert v == b"default" - v = store.pop(self.root + "xxx", b"") - assert v == b"" - v = store.pop(self.root + "xxx", None) - assert v is None - - store.close() - - def test_popitem(self): - store = self.create_store() - store[self.root + "foo"] = b"bar" - k, v = store.popitem() - assert k == self.root + "foo" - assert ensure_bytes(v) == b"bar" - assert len(store) == 0 - with pytest.raises(KeyError): - store.popitem() - - store.close() - - def test_writeable_values(self): - store = self.create_store() - - # __setitem__ should accept any value that implements buffer interface - store[self.root + "foo1"] = b"bar" - store[self.root + "foo2"] = bytearray(b"bar") - store[self.root + "foo3"] = array.array("B", b"bar") - store[self.root + "foo4"] = np.frombuffer(b"bar", dtype="u1") - - store.close() - - def test_update(self): - store = self.create_store() - assert self.root + "foo" not in store - assert self.root + "baz" not in store - - if self.version == 2: - store.update(foo=b"bar", baz=b"quux") - else: - kv = {self.root + "foo": b"bar", self.root + "baz": b"quux"} - store.update(kv) - - assert b"bar" == ensure_bytes(store[self.root + "foo"]) - assert b"quux" == ensure_bytes(store[self.root + "baz"]) - - store.close() - - def test_iterators(self): - store = self.create_store() - - # test iterator methods on empty store - assert 0 == len(store) - assert set() == set(store) - assert set() == set(store.keys()) - assert set() == set(store.values()) - assert set() == set(store.items()) - - # setup some values - store[self.root + "a"] = b"aaa" - store[self.root + "b"] = b"bbb" - store[self.root + "c/d"] = b"ddd" - store[self.root + "c/e/f"] = b"fff" - - # test iterators on store with data - assert 4 == len(store) - expected = set(self.root + k for k in ["a", "b", "c/d", "c/e/f"]) - assert expected == set(store) - assert expected == set(store.keys()) - assert {b"aaa", b"bbb", b"ddd", b"fff"} == set(map(ensure_bytes, store.values())) - assert { - (self.root + "a", b"aaa"), - (self.root + "b", b"bbb"), - (self.root + "c/d", b"ddd"), - (self.root + "c/e/f", b"fff"), - } == set(map(lambda kv: (kv[0], ensure_bytes(kv[1])), store.items())) - - store.close() - - def test_pickle(self): - # setup store - store = self.create_store() - store[self.root + "foo"] = b"bar" - store[self.root + "baz"] = b"quux" - n = len(store) - keys = sorted(store.keys()) - - # round-trip through pickle - dump = pickle.dumps(store) - # some stores cannot be opened twice at the same time, need to close - # store before can round-trip through pickle - store.close() - # check can still pickle after close - assert dump == pickle.dumps(store) - store2 = pickle.loads(dump) - - # verify - assert n == len(store2) - assert keys == sorted(store2.keys()) - assert b"bar" == ensure_bytes(store2[self.root + "foo"]) - assert b"quux" == ensure_bytes(store2[self.root + "baz"]) - - store2.close() - - def test_getsize(self): - store = self.create_store() - if isinstance(store, dict) or hasattr(store, "getsize"): - assert 0 == getsize(store) - store["foo"] = b"x" - assert 1 == getsize(store) - assert 1 == getsize(store, "foo") - store["bar"] = b"yy" - assert 3 == getsize(store) - assert 2 == getsize(store, "bar") - store["baz"] = bytearray(b"zzz") - assert 6 == getsize(store) - assert 3 == getsize(store, "baz") - store["quux"] = array.array("B", b"zzzz") - assert 10 == getsize(store) - assert 4 == getsize(store, "quux") - store["spong"] = np.frombuffer(b"zzzzz", dtype="u1") - assert 15 == getsize(store) - assert 5 == getsize(store, "spong") - - store.close() - - # noinspection PyStatementEffect - def test_hierarchy(self): - # setup - store = self.create_store() - store[self.root + "a"] = b"aaa" - store[self.root + "b"] = b"bbb" - store[self.root + "c/d"] = b"ddd" - store[self.root + "c/e/f"] = b"fff" - store[self.root + "c/e/g"] = b"ggg" - - # check keys - assert self.root + "a" in store - assert self.root + "b" in store - assert self.root + "c/d" in store - assert self.root + "c/e/f" in store - assert self.root + "c/e/g" in store - assert self.root + "c" not in store - assert self.root + "c/" not in store - assert self.root + "c/e" not in store - assert self.root + "c/e/" not in store - assert self.root + "c/d/x" not in store - - # check __getitem__ - with pytest.raises(KeyError): - store[self.root + "c"] - with pytest.raises(KeyError): - store[self.root + "c/e"] - with pytest.raises(KeyError): - store[self.root + "c/d/x"] - - # test getsize (optional) - if hasattr(store, "getsize"): - # TODO: proper behavior of getsize? - # v3 returns size of all nested arrays, not just the - # size of the arrays in the current folder. - if self.version == 2: - assert 6 == store.getsize() - else: - assert 15 == store.getsize() - assert 3 == store.getsize("a") - assert 3 == store.getsize("b") - if self.version == 2: - assert 3 == store.getsize("c") - else: - assert 9 == store.getsize("c") - assert 3 == store.getsize("c/d") - assert 6 == store.getsize("c/e") - assert 3 == store.getsize("c/e/f") - assert 3 == store.getsize("c/e/g") - # non-existent paths - assert 0 == store.getsize("x") - assert 0 == store.getsize("a/x") - assert 0 == store.getsize("c/x") - assert 0 == store.getsize("c/x/y") - assert 0 == store.getsize("c/d/y") - assert 0 == store.getsize("c/d/y/z") - - # access item via full path - assert 3 == store.getsize(self.root + "a") - - # test listdir (optional) - if hasattr(store, "listdir"): - assert {"a", "b", "c"} == set(store.listdir(self.root)) - assert {"d", "e"} == set(store.listdir(self.root + "c")) - assert {"f", "g"} == set(store.listdir(self.root + "c/e")) - # no exception raised if path does not exist or is leaf - assert [] == store.listdir(self.root + "x") - assert [] == store.listdir(self.root + "a/x") - assert [] == store.listdir(self.root + "c/x") - assert [] == store.listdir(self.root + "c/x/y") - assert [] == store.listdir(self.root + "c/d/y") - assert [] == store.listdir(self.root + "c/d/y/z") - assert [] == store.listdir(self.root + "c/e/f") - - # test rename (optional) - if store.is_erasable(): - store.rename("c/e", "c/e2") - assert self.root + "c/d" in store - assert self.root + "c/e" not in store - assert self.root + "c/e/f" not in store - assert self.root + "c/e/g" not in store - assert self.root + "c/e2" not in store - assert self.root + "c/e2/f" in store - assert self.root + "c/e2/g" in store - store.rename("c/e2", "c/e") - assert self.root + "c/d" in store - assert self.root + "c/e2" not in store - assert self.root + "c/e2/f" not in store - assert self.root + "c/e2/g" not in store - assert self.root + "c/e" not in store - assert self.root + "c/e/f" in store - assert self.root + "c/e/g" in store - store.rename("c", "c1/c2/c3") - assert self.root + "a" in store - assert self.root + "c" not in store - assert self.root + "c/d" not in store - assert self.root + "c/e" not in store - assert self.root + "c/e/f" not in store - assert self.root + "c/e/g" not in store - assert self.root + "c1" not in store - assert self.root + "c1/c2" not in store - assert self.root + "c1/c2/c3" not in store - assert self.root + "c1/c2/c3/d" in store - assert self.root + "c1/c2/c3/e" not in store - assert self.root + "c1/c2/c3/e/f" in store - assert self.root + "c1/c2/c3/e/g" in store - store.rename("c1/c2/c3", "c") - assert self.root + "c" not in store - assert self.root + "c/d" in store - assert self.root + "c/e" not in store - assert self.root + "c/e/f" in store - assert self.root + "c/e/g" in store - assert self.root + "c1" not in store - assert self.root + "c1/c2" not in store - assert self.root + "c1/c2/c3" not in store - assert self.root + "c1/c2/c3/d" not in store - assert self.root + "c1/c2/c3/e" not in store - assert self.root + "c1/c2/c3/e/f" not in store - assert self.root + "c1/c2/c3/e/g" not in store - - # test rmdir (optional) - store.rmdir("c/e") - assert self.root + "c/d" in store - assert self.root + "c/e/f" not in store - assert self.root + "c/e/g" not in store - store.rmdir("c") - assert self.root + "c/d" not in store - store.rmdir() - assert self.root + "a" not in store - assert self.root + "b" not in store - store[self.root + "a"] = b"aaa" - store[self.root + "c/d"] = b"ddd" - store[self.root + "c/e/f"] = b"fff" - # no exceptions raised if path does not exist or is leaf - store.rmdir("x") - store.rmdir("a/x") - store.rmdir("c/x") - store.rmdir("c/x/y") - store.rmdir("c/d/y") - store.rmdir("c/d/y/z") - store.rmdir("c/e/f") - assert self.root + "a" in store - assert self.root + "c/d" in store - assert self.root + "c/e/f" in store - - store.close() - - def test_init_array(self, dimension_separator_fixture): - pass_dim_sep, want_dim_sep = dimension_separator_fixture - - store = self.create_store(dimension_separator=pass_dim_sep) - init_array(store, shape=1000, chunks=100) - - # check metadata - assert array_meta_key in store - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta["zarr_format"] - assert (1000,) == meta["shape"] - assert (100,) == meta["chunks"] - assert np.dtype(None) == meta["dtype"] - assert default_compressor.get_config() == meta["compressor"] - assert meta["fill_value"] is None - # Missing MUST be assumed to be "." - assert meta.get("dimension_separator", ".") is want_dim_sep - - store.close() - - def test_init_array_overwrite(self): - self._test_init_array_overwrite("F") - - def test_init_array_overwrite_path(self): - self._test_init_array_overwrite_path("F") - - def test_init_array_overwrite_chunk_store(self): - self._test_init_array_overwrite_chunk_store("F") - - def test_init_group_overwrite(self): - self._test_init_group_overwrite("F") - - def test_init_group_overwrite_path(self): - self._test_init_group_overwrite_path("F") - - def test_init_group_overwrite_chunk_store(self): - self._test_init_group_overwrite_chunk_store("F") - - def _test_init_array_overwrite(self, order): - # setup - store = self.create_store() - if self.version == 2: - path = None - mkey = array_meta_key - meta = dict( - shape=(2000,), - chunks=(200,), - dtype=np.dtype("u1"), - compressor=Zlib(1).get_config(), - fill_value=0, - order=order, - filters=None, - ) - else: - path = "arr1" # no default, have to specify for v3 - mkey = meta_root + path + ".array.json" - meta = dict( - shape=(2000,), - chunk_grid=dict(type="regular", chunk_shape=(200,), separator=("/")), - data_type=np.dtype("u1"), - compressor=Zlib(1), - fill_value=0, - chunk_memory_layout=order, - filters=None, - ) - store[mkey] = store._metadata_class.encode_array_metadata(meta) - - # don't overwrite (default) - with pytest.raises(ContainsArrayError): - init_array(store, shape=1000, chunks=100, path=path) - - # do overwrite - try: - init_array(store, shape=1000, chunks=100, dtype="i4", overwrite=True, path=path) - except NotImplementedError: - pass - else: - assert mkey in store - meta = store._metadata_class.decode_array_metadata(store[mkey]) - if self.version == 2: - assert ZARR_FORMAT == meta["zarr_format"] - assert (100,) == meta["chunks"] - assert np.dtype("i4") == meta["dtype"] - else: - assert (100,) == meta["chunk_grid"]["chunk_shape"] - assert np.dtype("i4") == meta["data_type"] - assert (1000,) == meta["shape"] - - store.close() - - def test_init_array_path(self): - path = "foo/bar" - store = self.create_store() - init_array(store, shape=1000, chunks=100, path=path) - - # check metadata - if self.version == 2: - mkey = path + "/" + array_meta_key - else: - mkey = meta_root + path + ".array.json" - assert mkey in store - meta = store._metadata_class.decode_array_metadata(store[mkey]) - if self.version == 2: - assert ZARR_FORMAT == meta["zarr_format"] - assert (100,) == meta["chunks"] - assert np.dtype(None) == meta["dtype"] - assert default_compressor.get_config() == meta["compressor"] - else: - assert (100,) == meta["chunk_grid"]["chunk_shape"] - assert np.dtype(None) == meta["data_type"] - assert default_compressor == meta["compressor"] - assert (1000,) == meta["shape"] - assert meta["fill_value"] is None - - store.close() - - def _test_init_array_overwrite_path(self, order): - # setup - path = "foo/bar" - store = self.create_store() - if self.version == 2: - mkey = path + "/" + array_meta_key - meta = dict( - shape=(2000,), - chunks=(200,), - dtype=np.dtype("u1"), - compressor=Zlib(1).get_config(), - fill_value=0, - order=order, - filters=None, - ) - else: - mkey = meta_root + path + ".array.json" - meta = dict( - shape=(2000,), - chunk_grid=dict(type="regular", chunk_shape=(200,), separator=("/")), - data_type=np.dtype("u1"), - compressor=Zlib(1), - fill_value=0, - chunk_memory_layout=order, - filters=None, - ) - store[mkey] = store._metadata_class.encode_array_metadata(meta) - - # don't overwrite - with pytest.raises(ContainsArrayError): - init_array(store, shape=1000, chunks=100, path=path) - - # do overwrite - try: - init_array(store, shape=1000, chunks=100, dtype="i4", path=path, overwrite=True) - except NotImplementedError: - pass - else: - if self.version == 2: - assert group_meta_key in store - assert array_meta_key not in store - assert mkey in store - # should have been overwritten - meta = store._metadata_class.decode_array_metadata(store[mkey]) - if self.version == 2: - assert ZARR_FORMAT == meta["zarr_format"] - assert (100,) == meta["chunks"] - assert np.dtype("i4") == meta["dtype"] - else: - assert (100,) == meta["chunk_grid"]["chunk_shape"] - assert np.dtype("i4") == meta["data_type"] - assert (1000,) == meta["shape"] - - store.close() - - def test_init_array_overwrite_group(self): - # setup - path = "foo/bar" - store = self.create_store() - if self.version == 2: - array_key = path + "/" + array_meta_key - group_key = path + "/" + group_meta_key - else: - array_key = meta_root + path + ".array.json" - group_key = meta_root + path + ".group.json" - store[group_key] = store._metadata_class.encode_group_metadata() - - # don't overwrite - with pytest.raises(ContainsGroupError): - init_array(store, shape=1000, chunks=100, path=path) - - # do overwrite - try: - init_array(store, shape=1000, chunks=100, dtype="i4", path=path, overwrite=True) - except NotImplementedError: - pass - else: - assert group_key not in store - assert array_key in store - meta = store._metadata_class.decode_array_metadata(store[array_key]) - if self.version == 2: - assert ZARR_FORMAT == meta["zarr_format"] - assert (100,) == meta["chunks"] - assert np.dtype("i4") == meta["dtype"] - else: - assert (100,) == meta["chunk_grid"]["chunk_shape"] - assert np.dtype("i4") == meta["data_type"] - assert (1000,) == meta["shape"] - - store.close() - - def _test_init_array_overwrite_chunk_store(self, order): - # setup - store = self.create_store() - chunk_store = self.create_store() - - if self.version == 2: - path = None - data_path = "" - mkey = array_meta_key - meta = dict( - shape=(2000,), - chunks=(200,), - dtype=np.dtype("u1"), - compressor=None, - fill_value=0, - filters=None, - order=order, - ) - else: - path = "arr1" - data_path = data_root + "arr1/" - mkey = meta_root + path + ".array.json" - meta = dict( - shape=(2000,), - chunk_grid=dict(type="regular", chunk_shape=(200,), separator=("/")), - data_type=np.dtype("u1"), - compressor=None, - fill_value=0, - filters=None, - chunk_memory_layout=order, - ) - - store[mkey] = store._metadata_class.encode_array_metadata(meta) - - chunk_store[data_path + "0"] = b"aaa" - chunk_store[data_path + "1"] = b"bbb" - - # don't overwrite (default) - with pytest.raises(ContainsArrayError): - init_array(store, path=path, shape=1000, chunks=100, chunk_store=chunk_store) - - # do overwrite - try: - init_array( - store, - path=path, - shape=1000, - chunks=100, - dtype="i4", - overwrite=True, - chunk_store=chunk_store, - ) - except NotImplementedError: - pass - else: - assert mkey in store - meta = store._metadata_class.decode_array_metadata(store[mkey]) - if self.version == 2: - assert ZARR_FORMAT == meta["zarr_format"] - assert (100,) == meta["chunks"] - assert np.dtype("i4") == meta["dtype"] - else: - assert (100,) == meta["chunk_grid"]["chunk_shape"] - assert np.dtype("i4") == meta["data_type"] - assert (1000,) == meta["shape"] - assert data_path + "0" not in chunk_store - assert data_path + "1" not in chunk_store - - store.close() - chunk_store.close() - - def test_init_array_compat(self): - store = self.create_store() - if self.version == 2: - path = None - mkey = array_meta_key - else: - path = "arr1" - mkey = meta_root + path + ".array.json" - init_array(store, path=path, shape=1000, chunks=100, compressor="none") - meta = store._metadata_class.decode_array_metadata(store[mkey]) - if self.version == 2: - assert meta["compressor"] is None - else: - assert "compressor" not in meta - store.close() - - def test_init_group(self): - store = self.create_store() - if self.version == 2: - path = None - mkey = group_meta_key - else: - path = "foo" - mkey = meta_root + path + ".group.json" - init_group(store, path=path) - - # check metadata - assert mkey in store - meta = store._metadata_class.decode_group_metadata(store[mkey]) - if self.version == 2: - assert ZARR_FORMAT == meta["zarr_format"] - else: - assert meta == {"attributes": {}} - - store.close() - - def _test_init_group_overwrite(self, order): - if self.version == 3: - pytest.skip("In v3 array and group names cannot overlap") - # setup - store = self.create_store() - store[array_meta_key] = store._metadata_class.encode_array_metadata( - dict( - shape=(2000,), - chunks=(200,), - dtype=np.dtype("u1"), - compressor=None, - fill_value=0, - order=order, - filters=None, - ) - ) - - # don't overwrite array (default) - with pytest.raises(ContainsArrayError): - init_group(store) - - # do overwrite - try: - init_group(store, overwrite=True) - except NotImplementedError: - pass - else: - assert array_meta_key not in store - assert group_meta_key in store - meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) - assert ZARR_FORMAT == meta["zarr_format"] - - # don't overwrite group - with pytest.raises(ValueError): - init_group(store) - - store.close() - - def _test_init_group_overwrite_path(self, order): - # setup - path = "foo/bar" - store = self.create_store() - if self.version == 2: - meta = dict( - shape=(2000,), - chunks=(200,), - dtype=np.dtype("u1"), - compressor=None, - fill_value=0, - order=order, - filters=None, - ) - array_key = path + "/" + array_meta_key - group_key = path + "/" + group_meta_key - else: - meta = dict( - shape=(2000,), - chunk_grid=dict(type="regular", chunk_shape=(200,), separator=("/")), - data_type=np.dtype("u1"), - compressor=None, - fill_value=0, - filters=None, - chunk_memory_layout=order, - ) - array_key = meta_root + path + ".array.json" - group_key = meta_root + path + ".group.json" - store[array_key] = store._metadata_class.encode_array_metadata(meta) - - # don't overwrite - with pytest.raises(ValueError): - init_group(store, path=path) - - # do overwrite - try: - init_group(store, overwrite=True, path=path) - except NotImplementedError: - pass - else: - if self.version == 2: - assert array_meta_key not in store - assert group_meta_key in store - assert array_key not in store - assert group_key in store - # should have been overwritten - meta = store._metadata_class.decode_group_metadata(store[group_key]) - if self.version == 2: - assert ZARR_FORMAT == meta["zarr_format"] - else: - assert meta == {"attributes": {}} - - store.close() - - def _test_init_group_overwrite_chunk_store(self, order): - if self.version == 3: - pytest.skip("In v3 array and group names cannot overlap") - # setup - store = self.create_store() - chunk_store = self.create_store() - store[array_meta_key] = store._metadata_class.encode_array_metadata( - dict( - shape=(2000,), - chunks=(200,), - dtype=np.dtype("u1"), - compressor=None, - fill_value=0, - filters=None, - order=order, - ) - ) - chunk_store["foo"] = b"bar" - chunk_store["baz"] = b"quux" - - # don't overwrite array (default) - with pytest.raises(ValueError): - init_group(store, chunk_store=chunk_store) - - # do overwrite - try: - init_group(store, overwrite=True, chunk_store=chunk_store) - except NotImplementedError: - pass - else: - assert array_meta_key not in store - assert group_meta_key in store - meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) - assert ZARR_FORMAT == meta["zarr_format"] - assert "foo" not in chunk_store - assert "baz" not in chunk_store - - # don't overwrite group - with pytest.raises(ValueError): - init_group(store) - - store.close() - chunk_store.close() - - -class TestMappingStore(StoreTests): - def create_store(self, **kwargs): - skip_if_nested_chunks(**kwargs) - return KVStore(dict()) - - def test_set_invalid_content(self): - # Generic mappings support non-buffer types - pass - - -def setdel_hierarchy_checks(store, root=""): - # these tests are for stores that are aware of hierarchy levels; this - # behaviour is not strictly required by Zarr but these tests are included - # to define behaviour of MemoryStore and DirectoryStore classes - - # check __setitem__ and __delitem__ blocked by leaf - - store[root + "a/b"] = b"aaa" - with pytest.raises(KeyError): - store[root + "a/b/c"] = b"xxx" - with pytest.raises(KeyError): - del store[root + "a/b/c"] - - store[root + "d"] = b"ddd" - with pytest.raises(KeyError): - store[root + "d/e/f"] = b"xxx" - with pytest.raises(KeyError): - del store[root + "d/e/f"] - - # test __setitem__ overwrite level - store[root + "x/y/z"] = b"xxx" - store[root + "x/y"] = b"yyy" - assert b"yyy" == ensure_bytes(store[root + "x/y"]) - assert root + "x/y/z" not in store - store[root + "x"] = b"zzz" - assert b"zzz" == ensure_bytes(store[root + "x"]) - assert root + "x/y" not in store - - # test __delitem__ overwrite level - store[root + "r/s/t"] = b"xxx" - del store[root + "r/s"] - assert root + "r/s/t" not in store - store[root + "r/s"] = b"xxx" - del store[root + "r"] - assert root + "r/s" not in store - - -class TestMemoryStore(StoreTests): - def create_store(self, **kwargs): - skip_if_nested_chunks(**kwargs) - return MemoryStore(**kwargs) - - def test_store_contains_bytes(self): - store = self.create_store() - store[self.root + "foo"] = np.array([97, 98, 99, 100, 101], dtype=np.uint8) - assert store[self.root + "foo"] == b"abcde" - - def test_setdel(self): - store = self.create_store() - setdel_hierarchy_checks(store, self.root) - - -class TestDictStore(StoreTests): - def create_store(self, **kwargs): - skip_if_nested_chunks(**kwargs) - - with pytest.warns(DeprecationWarning): - return DictStore(**kwargs) - - def test_deprecated(self): - store = self.create_store() - assert isinstance(store, MemoryStore) - - def test_pickle(self): - with pytest.warns(DeprecationWarning): - # pickle.load() will also trigger deprecation warning - super().test_pickle() - - -class TestDirectoryStore(StoreTests): - def create_store(self, normalize_keys=False, dimension_separator=".", **kwargs): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = DirectoryStore( - path, normalize_keys=normalize_keys, dimension_separator=dimension_separator, **kwargs - ) - return store - - def test_filesystem_path(self): - # test behaviour with path that does not exist - path = "data/store" - if os.path.exists(path): - shutil.rmtree(path) - store = DirectoryStore(path) - # should only be created on demand - assert not os.path.exists(path) - store["foo"] = b"bar" - assert os.path.isdir(path) - - # check correct permissions - # regression test for https://github.com/zarr-developers/zarr-python/issues/325 - stat = os.stat(path) - mode = stat.st_mode & 0o666 - umask = os.umask(0) - os.umask(umask) - assert mode == (0o666 & ~umask) - - # test behaviour with file path - with tempfile.NamedTemporaryFile() as f: - with pytest.raises(ValueError): - DirectoryStore(f.name) - - def test_init_pathlib(self): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - DirectoryStore(pathlib.Path(path)) - - def test_pickle_ext(self): - store = self.create_store() - store2 = pickle.loads(pickle.dumps(store)) - - # check path is preserved - assert store.path == store2.path - - # check point to same underlying directory - assert self.root + "xxx" not in store - store2[self.root + "xxx"] = b"yyy" - assert b"yyy" == ensure_bytes(store[self.root + "xxx"]) - - def test_setdel(self): - store = self.create_store() - setdel_hierarchy_checks(store, self.root) - - def test_normalize_keys(self): - store = self.create_store(normalize_keys=True) - store[self.root + "FOO"] = b"bar" - assert self.root + "FOO" in store - assert self.root + "foo" in store - - def test_listing_keys_slash(self): - def mock_walker_slash(_path): - yield from [ - # trailing slash in first key - ("root_with_slash/", ["d1", "g1"], [".zgroup"]), - ("root_with_slash/d1", [], [".zarray"]), - ("root_with_slash/g1", [], [".zgroup"]), - ] - - res = set(DirectoryStore._keys_fast("root_with_slash/", walker=mock_walker_slash)) - assert res == {".zgroup", "g1/.zgroup", "d1/.zarray"} - - def test_listing_keys_no_slash(self): - def mock_walker_no_slash(_path): - yield from [ - # no trailing slash in first key - ("root_with_no_slash", ["d1", "g1"], [".zgroup"]), - ("root_with_no_slash/d1", [], [".zarray"]), - ("root_with_no_slash/g1", [], [".zgroup"]), - ] - - res = set(DirectoryStore._keys_fast("root_with_no_slash", mock_walker_no_slash)) - assert res == {".zgroup", "g1/.zgroup", "d1/.zarray"} - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestFSStore(StoreTests): - @pytest.fixture - def memory_store(self): - store = FSStore("memory://") - yield store - store.fs.store.clear() - - def create_store(self, normalize_keys=False, dimension_separator=".", path=None, **kwargs): - if path is None: - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - - store = FSStore( - path, normalize_keys=normalize_keys, dimension_separator=dimension_separator, **kwargs - ) - return store - - def test_init_array(self): - store = self.create_store() - init_array(store, shape=1000, chunks=100) - - # check metadata - assert array_meta_key in store - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta["zarr_format"] - assert (1000,) == meta["shape"] - assert (100,) == meta["chunks"] - assert np.dtype(None) == meta["dtype"] - assert meta["dimension_separator"] == "." - - def test_dimension_separator(self): - for x in (".", "/"): - store = self.create_store(dimension_separator=x) - norm = store._normalize_key - assert ".zarray" == norm(".zarray") - assert ".zarray" == norm("/.zarray") - assert ".zgroup" == norm("/.zgroup") - assert "group/.zarray" == norm("group/.zarray") - assert "group/.zgroup" == norm("group/.zgroup") - assert "group/.zarray" == norm("/group/.zarray") - assert "group/.zgroup" == norm("/group/.zgroup") - - def test_complex(self): - path1 = tempfile.mkdtemp() - path2 = tempfile.mkdtemp() - store = self.create_store( - path="simplecache::file://" + path1, - simplecache={"same_names": True, "cache_storage": path2}, - ) - assert not store - assert not os.listdir(path1) - assert not os.listdir(path2) - store[self.root + "foo"] = b"hello" - assert "foo" in os.listdir(str(path1) + "/" + self.root) - assert self.root + "foo" in store - assert not os.listdir(str(path2)) - assert store[self.root + "foo"] == b"hello" - assert "foo" in os.listdir(str(path2)) - - def test_deep_ndim(self): - import zarr - - store = self.create_store() - path = None if self.version == 2 else "group1" - foo = zarr.open_group(store=store, path=path) - bar = foo.create_group("bar") - baz = bar.create_dataset("baz", shape=(4, 4, 4), chunks=(2, 2, 2), dtype="i8") - baz[:] = 1 - if self.version == 2: - assert set(store.listdir()) == {".zgroup", "bar"} - else: - assert set(store.listdir()) == {"data", "meta", "zarr.json"} - assert set(store.listdir("meta/root/" + path)) == {"bar", "bar.group.json"} - assert set(store.listdir("data/root/" + path)) == {"bar"} - assert foo["bar"]["baz"][(0, 0, 0)] == 1 - - def test_not_fsspec(self): - import zarr - - path = tempfile.mkdtemp() - with pytest.raises(ValueError, match="storage_options"): - zarr.open_array(path, mode="w", storage_options={"some": "kwargs"}) - with pytest.raises(ValueError, match="storage_options"): - zarr.open_group(path, mode="w", storage_options={"some": "kwargs"}) - zarr.open_array("file://" + path, mode="w", shape=(1,), dtype="f8") - - def test_create(self): - import zarr - - path1 = tempfile.mkdtemp() - path2 = tempfile.mkdtemp() - g = zarr.open_group("file://" + path1, mode="w", storage_options={"auto_mkdir": True}) - a = g.create_dataset("data", shape=(8,)) - a[:4] = [0, 1, 2, 3] - assert "data" in os.listdir(path1) - assert ".zgroup" in os.listdir(path1) - - # consolidated metadata (GH#915) - consolidate_metadata("file://" + path1) - assert ".zmetadata" in os.listdir(path1) - - g = zarr.open_group( - "simplecache::file://" + path1, - mode="r", - storage_options={"cache_storage": path2, "same_names": True}, - ) - assert g.data[:].tolist() == [0, 1, 2, 3, 0, 0, 0, 0] - with pytest.raises(PermissionError): - g.data[:] = 1 - - @pytest.mark.parametrize("mode,allowed", [("r", False), ("r+", True)]) - def test_modify_consolidated(self, mode, allowed): - import zarr - - url = "file://" + tempfile.mkdtemp() - - # create - root = zarr.open_group(url, mode="w") - root.zeros("baz", shape=(10000, 10000), chunks=(1000, 1000), dtype="i4") - zarr.consolidate_metadata(url) - - # reopen and modify - root = zarr.open_consolidated(url, mode=mode) - if allowed: - root["baz"][0, 0] = 7 - - root = zarr.open_consolidated(url, mode="r") - assert root["baz"][0, 0] == 7 - else: - with pytest.raises(zarr.errors.ReadOnlyError): - root["baz"][0, 0] = 7 - - @pytest.mark.parametrize("mode", ["r", "r+"]) - def test_modify_consolidated_metadata_raises(self, mode): - import zarr - - url = "file://" + tempfile.mkdtemp() - - # create - root = zarr.open_group(url, mode="w") - root.zeros("baz", shape=(10000, 10000), chunks=(1000, 1000), dtype="i4") - zarr.consolidate_metadata(url) - - # reopen and modify - root = zarr.open_consolidated(url, mode=mode) - with pytest.raises(zarr.errors.ReadOnlyError): - root["baz"].resize(100, 100) - - def test_read_only(self): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = self.create_store(path=path) - store[self.root + "foo"] = b"bar" - - store = self.create_store(path=path, mode="r") - - with pytest.raises(PermissionError): - store[self.root + "foo"] = b"hex" - - with pytest.raises(PermissionError): - del store[self.root + "foo"] - - with pytest.raises(PermissionError): - store.delitems([self.root + "foo"]) - - with pytest.raises(PermissionError): - store.setitems({self.root + "foo": b"baz"}) - - with pytest.raises(PermissionError): - store.clear() - - with pytest.raises(PermissionError): - store.rmdir(self.root + "anydir") - - assert store[self.root + "foo"] == b"bar" - - def test_eq(self): - store1 = self.create_store(path="anypath") - store2 = self.create_store(path="anypath") - assert store1 == store2 - - @pytest.mark.usefixtures("s3") - def test_s3(self): - import zarr - - g = zarr.open_group("s3://test/out.zarr", mode="w", storage_options=self.s3so) - a = g.create_dataset("data", shape=(8,)) - a[:4] = [0, 1, 2, 3] - - g = zarr.open_group("s3://test/out.zarr", mode="r", storage_options=self.s3so) - - assert g.data[:].tolist() == [0, 1, 2, 3, 0, 0, 0, 0] - - # test via convenience - g = zarr.open("s3://test/out.zarr", mode="r", storage_options=self.s3so) - assert g.data[:].tolist() == [0, 1, 2, 3, 0, 0, 0, 0] - - @pytest.mark.usefixtures("s3") - def test_s3_complex(self): - import zarr - - g = zarr.open_group("s3://test/out.zarr", mode="w", storage_options=self.s3so) - expected = np.empty((8, 8, 8), dtype="int64") - expected[:] = -1 - a = g.create_dataset( - "data", shape=(8, 8, 8), fill_value=-1, chunks=(1, 1, 1), overwrite=True - ) - expected[0] = 0 - expected[3] = 3 - expected[6, 6, 6] = 6 - a[6, 6, 6] = 6 - a[:4] = expected[:4] - - b = g.create_dataset( - "data_f", - shape=(8,), - chunks=(1,), - dtype=[("foo", "S3"), ("bar", "i4")], - fill_value=(b"b", 1), - ) - b[:4] = (b"aaa", 2) - g2 = zarr.open_group("s3://test/out.zarr", mode="r", storage_options=self.s3so) - - assert (g2.data[:] == expected).all() - a.chunk_store.fs.invalidate_cache("test/out.zarr/data") - a[:] = 5 - assert (a[:] == 5).all() - - assert g2.data_f["foo"].tolist() == [b"aaa"] * 4 + [b"b"] * 4 - with pytest.raises(PermissionError): - g2.data[:] = 5 - - with pytest.raises(PermissionError): - g2.store.setitems({}) - - with pytest.raises(PermissionError): - # even though overwrite=True, store is read-only, so fails - g2.create_dataset( - "data", shape=(8, 8, 8), fill_value=-1, chunks=(1, 1, 1), overwrite=True - ) - - a = g.create_dataset( - "data", shape=(8, 8, 8), fill_value=-1, chunks=(1, 1, 1), overwrite=True - ) - assert (a[:] == -np.ones((8, 8, 8))).all() - - def test_exceptions(self, memory_store): - fs = memory_store.fs - group = zarr.open(memory_store, mode="w") - x = group.create_dataset("x", data=[1, 2, 3]) - y = group.create_dataset("y", data=1) - fs.store["/x/0"] = None - fs.store["/y/0"] = None - # no exception from FSStore.getitems getting KeyError - assert group.store.getitems(["foo"], contexts={}) == {} - # exception from FSStore.getitems getting AttributeError - with pytest.raises(Exception): # noqa: B017 - group.store.getitems(["x/0"], contexts={}) - # exception from FSStore.getitems getting AttributeError - with pytest.raises(Exception): # noqa: B017 - x[...] - # exception from FSStore.__getitem__ getting AttributeError - with pytest.raises(Exception): # noqa: B017 - y[...] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestFSStoreWithKeySeparator(StoreTests): - def create_store(self, normalize_keys=False, key_separator=".", **kwargs): - # Since the user is passing key_separator, that will take priority. - skip_if_nested_chunks(**kwargs) - - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - return FSStore(path, normalize_keys=normalize_keys, key_separator=key_separator) - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestFSStoreFromFilesystem(StoreTests): - def create_store(self, normalize_keys=False, dimension_separator=".", path=None, **kwargs): - import fsspec - - fs = fsspec.filesystem("file") - - if path is None: - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - - with pytest.raises(ValueError): - # can't specify storage_options when passing an - # existing fs object - _ = FSStore(path, fs=fs, auto_mkdir=True) - - store = FSStore( - path, - normalize_keys=normalize_keys, - dimension_separator=dimension_separator, - fs=fs, - **kwargs, - ) - - return store - - -@pytest.fixture() -def s3(request): - # writable local S3 system - import shlex - import subprocess - import time - - if "BOTO_CONFIG" not in os.environ: # pragma: no cover - os.environ["BOTO_CONFIG"] = "/dev/null" - if "AWS_ACCESS_KEY_ID" not in os.environ: # pragma: no cover - os.environ["AWS_ACCESS_KEY_ID"] = "foo" - if "AWS_SECRET_ACCESS_KEY" not in os.environ: # pragma: no cover - os.environ["AWS_SECRET_ACCESS_KEY"] = "bar" - requests = pytest.importorskip("requests") - s3fs = pytest.importorskip("s3fs") - pytest.importorskip("moto") - - port = 5555 - endpoint_uri = f"http://127.0.0.1:{port}/" - proc = subprocess.Popen( - shlex.split(f"moto_server -p {port}"), - stderr=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - ) - - timeout = 5 - while timeout > 0: - try: - r = requests.get(endpoint_uri) - if r.ok: - break - except Exception: # pragma: no cover - pass - timeout -= 0.1 # pragma: no cover - time.sleep(0.1) # pragma: no cover - s3so = dict(client_kwargs={"endpoint_url": endpoint_uri}, use_listings_cache=False) - s3 = s3fs.S3FileSystem(anon=False, **s3so) - s3.mkdir("test") - request.cls.s3so = s3so - yield - proc.terminate() - proc.wait() - - -class TestNestedDirectoryStore(TestDirectoryStore): - def create_store(self, normalize_keys=False, **kwargs): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = NestedDirectoryStore(path, normalize_keys=normalize_keys, **kwargs) - return store - - def test_init_array(self): - store = self.create_store() - assert store._dimension_separator == "/" - init_array(store, shape=1000, chunks=100) - - # check metadata - assert array_meta_key in store - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta["zarr_format"] - assert (1000,) == meta["shape"] - assert (100,) == meta["chunks"] - assert np.dtype(None) == meta["dtype"] - assert meta["dimension_separator"] == "/" - - def test_chunk_nesting(self): - store = self.create_store() - # any path where last segment looks like a chunk key gets special handling - store[self.root + "0.0"] = b"xxx" - assert b"xxx" == store[self.root + "0.0"] - # assert b'xxx' == store['0/0'] - store[self.root + "foo/10.20.30"] = b"yyy" - assert b"yyy" == store[self.root + "foo/10.20.30"] - # assert b'yyy' == store['foo/10/20/30'] - store[self.root + "42"] = b"zzz" - assert b"zzz" == store[self.root + "42"] - - def test_listdir(self): - store = self.create_store() - z = zarr.zeros((10, 10), chunks=(5, 5), store=store) - z[:] = 1 # write to all chunks - for k in store.listdir(): - assert store.get(k) is not None - - -class TestNestedDirectoryStoreNone: - def test_value_error(self): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = NestedDirectoryStore(path, normalize_keys=True, dimension_separator=None) - assert store._dimension_separator == "/" - - -class TestNestedDirectoryStoreWithWrongValue: - def test_value_error(self): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - with pytest.raises(ValueError): - NestedDirectoryStore(path, normalize_keys=True, dimension_separator=".") - - -class TestN5Store(TestNestedDirectoryStore): - def create_store(self, normalize_keys=False): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = N5Store(path, normalize_keys=normalize_keys) - return store - - def test_equal(self): - store_a = self.create_store() - store_b = N5Store(store_a.path) - assert store_a == store_b - - @pytest.mark.parametrize("zarr_meta_key", [".zarray", ".zattrs", ".zgroup"]) - def test_del_zarr_meta_key(self, zarr_meta_key): - store = self.create_store() - store[n5_attrs_key] = json_dumps({"foo": "bar"}) - del store[zarr_meta_key] - assert n5_attrs_key not in store - - def test_chunk_nesting(self): - store = self.create_store() - store["0.0"] = b"xxx" - assert "0.0" in store - assert b"xxx" == store["0.0"] - # assert b'xxx' == store['0/0'] - store["foo/10.20.30"] = b"yyy" - assert "foo/10.20.30" in store - assert b"yyy" == store["foo/10.20.30"] - # N5 reverses axis order - assert b"yyy" == store["foo/30/20/10"] - del store["foo/10.20.30"] - assert "foo/30/20/10" not in store - store["42"] = b"zzz" - assert "42" in store - assert b"zzz" == store["42"] - - def test_init_array(self): - store = self.create_store() - init_array(store, shape=1000, chunks=100) - - # check metadata - assert array_meta_key in store - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta["zarr_format"] - assert (1000,) == meta["shape"] - assert (100,) == meta["chunks"] - assert np.dtype(None) == meta["dtype"] - # N5Store wraps the actual compressor - compressor_config = meta["compressor"]["compressor_config"] - assert default_compressor.get_config() == compressor_config - # N5Store always has a fill value of 0 - assert meta["fill_value"] == 0 - assert meta["dimension_separator"] == "." - # Top-level groups AND arrays should have - # the n5 keyword in metadata - raw_n5_meta = json.loads(store[n5_attrs_key]) - assert raw_n5_meta.get("n5", None) == N5_FORMAT - - def test_init_array_path(self): - path = "foo/bar" - store = self.create_store() - init_array(store, shape=1000, chunks=100, path=path) - - # check metadata - key = path + "/" + array_meta_key - assert key in store - meta = store._metadata_class.decode_array_metadata(store[key]) - assert ZARR_FORMAT == meta["zarr_format"] - assert (1000,) == meta["shape"] - assert (100,) == meta["chunks"] - assert np.dtype(None) == meta["dtype"] - # N5Store wraps the actual compressor - compressor_config = meta["compressor"]["compressor_config"] - assert default_compressor.get_config() == compressor_config - # N5Store always has a fill value of 0 - assert meta["fill_value"] == 0 - - def test_init_array_compat(self): - store = self.create_store() - init_array(store, shape=1000, chunks=100, compressor="none") - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - # N5Store wraps the actual compressor - compressor_config = meta["compressor"]["compressor_config"] - assert compressor_config is None - - def test_init_array_overwrite(self): - self._test_init_array_overwrite("C") - - def test_init_array_overwrite_path(self): - self._test_init_array_overwrite_path("C") - - def test_init_array_overwrite_chunk_store(self): - self._test_init_array_overwrite_chunk_store("C") - - def test_init_group_overwrite(self): - self._test_init_group_overwrite("C") - - def test_init_group_overwrite_path(self): - self._test_init_group_overwrite_path("C") - - def test_init_group_overwrite_chunk_store(self): - self._test_init_group_overwrite_chunk_store("C") - - def test_init_group(self): - store = self.create_store() - init_group(store) - store[".zattrs"] = json_dumps({"foo": "bar"}) - # check metadata - assert group_meta_key in store - assert group_meta_key in store.listdir() - assert group_meta_key in store.listdir("") - meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) - assert ZARR_FORMAT == meta["zarr_format"] - - def test_filters(self): - all_filters, all_errors = zip( - *[ - (None, does_not_raise()), - ([], does_not_raise()), - ([AsType("f4", "f8")], pytest.raises(ValueError)), - ] - ) - for filters, error in zip(all_filters, all_errors): - store = self.create_store() - with error: - init_array(store, shape=1000, chunks=100, filters=filters) - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestN5FSStore(TestFSStore): - def create_store(self, normalize_keys=False, path=None, **kwargs): - if path is None: - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - - store = N5FSStore(path, normalize_keys=normalize_keys, **kwargs) - return store - - def test_equal(self): - store_a = self.create_store() - store_b = N5FSStore(store_a.path) - assert store_a == store_b - - # This is copied wholesale from the N5Store tests. The same test could - # be run by making TestN5FSStore inherit from both TestFSStore and - # TestN5Store, but a direct copy is arguably more explicit. - - @pytest.mark.parametrize("zarr_meta_key", [".zarray", ".zattrs", ".zgroup"]) - def test_del_zarr_meta_key(self, zarr_meta_key): - store = self.create_store() - store[n5_attrs_key] = json_dumps({"foo": "bar"}) - del store[zarr_meta_key] - assert n5_attrs_key not in store - - def test_chunk_nesting(self): - store = self.create_store() - store["0.0"] = b"xxx" - assert "0.0" in store - assert b"xxx" == store["0.0"] - # assert b'xxx' == store['0/0'] - store["foo/10.20.30"] = b"yyy" - assert "foo/10.20.30" in store - assert b"yyy" == store["foo/10.20.30"] - # N5 reverses axis order - assert b"yyy" == store["foo/30/20/10"] - del store["foo/10.20.30"] - assert "foo/30/20/10" not in store - store["42"] = b"zzz" - assert "42" in store - assert b"zzz" == store["42"] - - def test_init_array(self): - store = self.create_store() - init_array(store, shape=1000, chunks=100) - - # check metadata - assert array_meta_key in store - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta["zarr_format"] - assert (1000,) == meta["shape"] - assert (100,) == meta["chunks"] - assert np.dtype(None) == meta["dtype"] - # N5Store wraps the actual compressor - compressor_config = meta["compressor"]["compressor_config"] - assert default_compressor.get_config() == compressor_config - # N5Store always has a fill value of 0 - assert meta["fill_value"] == 0 - assert meta["dimension_separator"] == "." - # Top-level groups AND arrays should have - # the n5 keyword in metadata - raw_n5_meta = json.loads(store[n5_attrs_key]) - assert raw_n5_meta.get("n5", None) == N5_FORMAT - - def test_init_array_path(self): - path = "foo/bar" - store = self.create_store() - init_array(store, shape=1000, chunks=100, path=path) - - # check metadata - key = path + "/" + array_meta_key - assert key in store - meta = store._metadata_class.decode_array_metadata(store[key]) - assert ZARR_FORMAT == meta["zarr_format"] - assert (1000,) == meta["shape"] - assert (100,) == meta["chunks"] - assert np.dtype(None) == meta["dtype"] - # N5Store wraps the actual compressor - compressor_config = meta["compressor"]["compressor_config"] - assert default_compressor.get_config() == compressor_config - # N5Store always has a fill value of 0 - assert meta["fill_value"] == 0 - - def test_init_array_compat(self): - store = self.create_store() - init_array(store, shape=1000, chunks=100, compressor="none") - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - # N5Store wraps the actual compressor - compressor_config = meta["compressor"]["compressor_config"] - assert compressor_config is None - - def test_init_array_overwrite(self): - self._test_init_array_overwrite("C") - - def test_init_array_overwrite_path(self): - self._test_init_array_overwrite_path("C") - - def test_init_array_overwrite_chunk_store(self): - self._test_init_array_overwrite_chunk_store("C") - - def test_init_group_overwrite(self): - self._test_init_group_overwrite("C") - - def test_init_group_overwrite_path(self): - self._test_init_group_overwrite_path("C") - - def test_init_group_overwrite_chunk_store(self): - self._test_init_group_overwrite_chunk_store("C") - - def test_dimension_separator(self): - with pytest.warns(UserWarning, match="dimension_separator"): - self.create_store(dimension_separator="/") - - def test_init_group(self): - store = self.create_store() - init_group(store) - store[".zattrs"] = json_dumps({"foo": "bar"}) - # check metadata - assert group_meta_key in store - assert group_meta_key in store.listdir() - assert group_meta_key in store.listdir("") - meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) - assert ZARR_FORMAT == meta["zarr_format"] - - def test_filters(self): - all_filters, all_errors = zip( - *[ - (None, does_not_raise()), - ([], does_not_raise()), - ([AsType("f4", "f8")], pytest.raises(ValueError)), - ] - ) - for filters, error in zip(all_filters, all_errors): - store = self.create_store() - with error: - init_array(store, shape=1000, chunks=100, filters=filters) - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestNestedFSStore(TestNestedDirectoryStore): - def create_store(self, normalize_keys=False, path=None, **kwargs): - if path is None: - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = FSStore( - path, normalize_keys=normalize_keys, dimension_separator="/", auto_mkdir=True, **kwargs - ) - return store - - def test_numbered_groups(self): - import zarr - - # Create an array - store = self.create_store() - group = zarr.group(store=store) - arr = group.create_dataset("0", shape=(10, 10)) - arr[1] = 1 - - # Read it back - store = self.create_store(path=store.path) - zarr.open_group(store.path)["0"] - - -class TestTempStore(StoreTests): - def create_store(self, **kwargs): - skip_if_nested_chunks(**kwargs) - return TempStore(**kwargs) - - def test_setdel(self): - store = self.create_store() - setdel_hierarchy_checks(store, self.root) - - -class TestZipStore(StoreTests): - ZipStoreClass = ZipStore - - def create_store(self, **kwargs): - path = mktemp(suffix=".zip") - atexit.register(os.remove, path) - store = ZipStore(path, mode="w", **kwargs) - return store - - def test_mode(self): - with self.ZipStoreClass("data/store.zip", mode="w") as store: - store[self.root + "foo"] = b"bar" - store = self.ZipStoreClass("data/store.zip", mode="r") - with pytest.raises(PermissionError): - store[self.root + "foo"] = b"bar" - with pytest.raises(PermissionError): - store.clear() - - def test_flush(self): - store = self.ZipStoreClass("data/store.zip", mode="w") - store[self.root + "foo"] = b"bar" - store.flush() - assert store[self.root + "foo"] == b"bar" - store.close() - - store = self.ZipStoreClass("data/store.zip", mode="r") - store.flush() # no-op - - def test_context_manager(self): - with self.create_store() as store: - store[self.root + "foo"] = b"bar" - store[self.root + "baz"] = b"qux" - assert 2 == len(store) - - def test_pop(self): - # override because not implemented - store = self.create_store() - store[self.root + "foo"] = b"bar" - with pytest.raises(NotImplementedError): - store.pop(self.root + "foo") - - def test_popitem(self): - # override because not implemented - store = self.create_store() - store[self.root + "foo"] = b"bar" - with pytest.raises(NotImplementedError): - store.popitem() - - def test_permissions(self): - store = self.ZipStoreClass("data/store.zip", mode="w") - foo_key = "foo" if self.version == 2 else self.root + "foo" - # TODO: cannot provide key ending in / for v3 - # how to create an empty folder in that case? - baz_key = "baz/" if self.version == 2 else self.root + "baz" - store[foo_key] = b"bar" - store[baz_key] = b"" - - store.flush() - store.close() - z = ZipFile("data/store.zip", "r") - info = z.getinfo(foo_key) - perm = oct(info.external_attr >> 16) - assert perm == "0o644" - info = z.getinfo(baz_key) - perm = oct(info.external_attr >> 16) - # only for posix platforms - if os.name == "posix": - if self.version == 2: - assert perm == "0o40775" - else: - # baz/ on v2, but baz on v3, so not a directory - assert perm == "0o644" - z.close() - - def test_store_and_retrieve_ndarray(self): - store = ZipStore("data/store.zip") - x = np.array([[1, 2], [3, 4]]) - store["foo"] = x - y = np.frombuffer(store["foo"], dtype=x.dtype).reshape(x.shape) - assert np.array_equiv(y, x) - - -class TestDBMStore(StoreTests): - def create_store(self, dimension_separator=None): - path = mktemp(suffix=".anydbm") - atexit.register(atexit_rmglob, path + "*") - # create store using default dbm implementation - store = DBMStore(path, flag="n", dimension_separator=dimension_separator) - return store - - def test_context_manager(self): - with self.create_store() as store: - store[self.root + "foo"] = b"bar" - store[self.root + "baz"] = b"qux" - assert 2 == len(store) - - -class TestDBMStoreDumb(TestDBMStore): - def create_store(self, **kwargs): - path = mktemp(suffix=".dumbdbm") - atexit.register(atexit_rmglob, path + "*") - - import dbm.dumb as dumbdbm - - store = DBMStore(path, flag="n", open=dumbdbm.open, **kwargs) - return store - - -class TestDBMStoreGnu(TestDBMStore): - def create_store(self, **kwargs): - gdbm = pytest.importorskip("dbm.gnu") - path = mktemp(suffix=".gdbm") # pragma: no cover - atexit.register(os.remove, path) # pragma: no cover - store = DBMStore( - path, flag="n", open=gdbm.open, write_lock=False, **kwargs - ) # pragma: no cover - return store # pragma: no cover - - -class TestDBMStoreNDBM(TestDBMStore): - def create_store(self, **kwargs): - ndbm = pytest.importorskip("dbm.ndbm") - path = mktemp(suffix=".ndbm") # pragma: no cover - atexit.register(atexit_rmglob, path + "*") # pragma: no cover - store = DBMStore(path, flag="n", open=ndbm.open, **kwargs) # pragma: no cover - return store # pragma: no cover - - -class TestDBMStoreBerkeleyDB(TestDBMStore): - def create_store(self, **kwargs): - bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix=".dbm") - atexit.register(os.remove, path) - store = DBMStore(path, flag="n", open=bsddb3.btopen, write_lock=False, **kwargs) - return store - - -class TestLMDBStore(StoreTests): - def create_store(self, **kwargs): - pytest.importorskip("lmdb") - path = mktemp(suffix=".lmdb") - atexit.register(atexit_rmtree, path) - buffers = True - store = LMDBStore(path, buffers=buffers, **kwargs) - return store - - def test_context_manager(self): - with self.create_store() as store: - store[self.root + "foo"] = b"bar" - store[self.root + "baz"] = b"qux" - assert 2 == len(store) - - -class TestSQLiteStore(StoreTests): - def create_store(self, **kwargs): - pytest.importorskip("sqlite3") - path = mktemp(suffix=".db") - atexit.register(atexit_rmtree, path) - store = SQLiteStore(path, **kwargs) - return store - - def test_underscore_in_name(self): - path = mktemp(suffix=".db") - atexit.register(atexit_rmtree, path) - store = SQLiteStore(path) - store["a"] = b"aaa" - store["a_b"] = b"aa_bb" - store.rmdir("a") - assert "a_b" in store - - -class TestSQLiteStoreInMemory(TestSQLiteStore): - def create_store(self, **kwargs): - pytest.importorskip("sqlite3") - store = SQLiteStore(":memory:", **kwargs) - return store - - def test_pickle(self): - # setup store - store = self.create_store() - store[self.root + "foo"] = b"bar" - store[self.root + "baz"] = b"quux" - - # round-trip through pickle - with pytest.raises(PicklingError): - pickle.dumps(store) - - -@skip_test_env_var("ZARR_TEST_MONGO") -class TestMongoDBStore(StoreTests): - def create_store(self, **kwargs): - pytest.importorskip("pymongo") - store = MongoDBStore( - host="127.0.0.1", database="zarr_tests", collection="zarr_tests", **kwargs - ) - # start with an empty store - store.clear() - return store - - -@skip_test_env_var("ZARR_TEST_REDIS") -class TestRedisStore(StoreTests): - def create_store(self, **kwargs): - # TODO: this is the default host for Redis on Travis, - # we probably want to generalize this though - pytest.importorskip("redis") - store = RedisStore(host="localhost", port=6379, **kwargs) - # start with an empty store - store.clear() - return store - - -class TestLRUStoreCache(StoreTests): - CountingClass = CountingDict - LRUStoreClass = LRUStoreCache - - def create_store(self, **kwargs): - # wrapper therefore no dimension_separator argument - skip_if_nested_chunks(**kwargs) - return self.LRUStoreClass(dict(), max_size=2**27) - - def test_cache_values_no_max_size(self): - # setup store - store = self.CountingClass() - foo_key = self.root + "foo" - bar_key = self.root + "bar" - store[foo_key] = b"xxx" - store[bar_key] = b"yyy" - assert 0 == store.counter["__getitem__", foo_key] - assert 1 == store.counter["__setitem__", foo_key] - assert 0 == store.counter["__getitem__", bar_key] - assert 1 == store.counter["__setitem__", bar_key] - - # setup cache - cache = self.LRUStoreClass(store, max_size=None) - assert 0 == cache.hits - assert 0 == cache.misses - - # test first __getitem__, cache miss - assert b"xxx" == cache[foo_key] - assert 1 == store.counter["__getitem__", foo_key] - assert 1 == store.counter["__setitem__", foo_key] - assert 0 == cache.hits - assert 1 == cache.misses - - # test second __getitem__, cache hit - assert b"xxx" == cache[foo_key] - assert 1 == store.counter["__getitem__", foo_key] - assert 1 == store.counter["__setitem__", foo_key] - assert 1 == cache.hits - assert 1 == cache.misses - - # test __setitem__, __getitem__ - cache[foo_key] = b"zzz" - assert 1 == store.counter["__getitem__", foo_key] - assert 2 == store.counter["__setitem__", foo_key] - # should be a cache hit - assert b"zzz" == cache[foo_key] - assert 1 == store.counter["__getitem__", foo_key] - assert 2 == store.counter["__setitem__", foo_key] - assert 2 == cache.hits - assert 1 == cache.misses - - # manually invalidate all cached values - cache.invalidate_values() - assert b"zzz" == cache[foo_key] - assert 2 == store.counter["__getitem__", foo_key] - assert 2 == store.counter["__setitem__", foo_key] - cache.invalidate() - assert b"zzz" == cache[foo_key] - assert 3 == store.counter["__getitem__", foo_key] - assert 2 == store.counter["__setitem__", foo_key] - - # test __delitem__ - del cache[foo_key] - with pytest.raises(KeyError): - # noinspection PyStatementEffect - cache[foo_key] - with pytest.raises(KeyError): - # noinspection PyStatementEffect - store[foo_key] - - # verify other keys untouched - assert 0 == store.counter["__getitem__", bar_key] - assert 1 == store.counter["__setitem__", bar_key] - - def test_cache_values_with_max_size(self): - # setup store - store = self.CountingClass() - foo_key = self.root + "foo" - bar_key = self.root + "bar" - store[foo_key] = b"xxx" - store[bar_key] = b"yyy" - assert 0 == store.counter["__getitem__", foo_key] - assert 0 == store.counter["__getitem__", bar_key] - # setup cache - can only hold one item - cache = self.LRUStoreClass(store, max_size=5) - assert 0 == cache.hits - assert 0 == cache.misses - - # test first 'foo' __getitem__, cache miss - assert b"xxx" == cache[foo_key] - assert 1 == store.counter["__getitem__", foo_key] - assert 0 == cache.hits - assert 1 == cache.misses - - # test second 'foo' __getitem__, cache hit - assert b"xxx" == cache[foo_key] - assert 1 == store.counter["__getitem__", foo_key] - assert 1 == cache.hits - assert 1 == cache.misses - - # test first 'bar' __getitem__, cache miss - assert b"yyy" == cache[bar_key] - assert 1 == store.counter["__getitem__", bar_key] - assert 1 == cache.hits - assert 2 == cache.misses - - # test second 'bar' __getitem__, cache hit - assert b"yyy" == cache[bar_key] - assert 1 == store.counter["__getitem__", bar_key] - assert 2 == cache.hits - assert 2 == cache.misses - - # test 'foo' __getitem__, should have been evicted, cache miss - assert b"xxx" == cache[foo_key] - assert 2 == store.counter["__getitem__", foo_key] - assert 2 == cache.hits - assert 3 == cache.misses - - # test 'bar' __getitem__, should have been evicted, cache miss - assert b"yyy" == cache[bar_key] - assert 2 == store.counter["__getitem__", bar_key] - assert 2 == cache.hits - assert 4 == cache.misses - - # setup store - store = self.CountingClass() - store[foo_key] = b"xxx" - store[bar_key] = b"yyy" - assert 0 == store.counter["__getitem__", foo_key] - assert 0 == store.counter["__getitem__", bar_key] - # setup cache - can hold two items - cache = self.LRUStoreClass(store, max_size=6) - assert 0 == cache.hits - assert 0 == cache.misses - - # test first 'foo' __getitem__, cache miss - assert b"xxx" == cache[foo_key] - assert 1 == store.counter["__getitem__", foo_key] - assert 0 == cache.hits - assert 1 == cache.misses - - # test second 'foo' __getitem__, cache hit - assert b"xxx" == cache[foo_key] - assert 1 == store.counter["__getitem__", foo_key] - assert 1 == cache.hits - assert 1 == cache.misses - - # test first 'bar' __getitem__, cache miss - assert b"yyy" == cache[bar_key] - assert 1 == store.counter["__getitem__", bar_key] - assert 1 == cache.hits - assert 2 == cache.misses - - # test second 'bar' __getitem__, cache hit - assert b"yyy" == cache[bar_key] - assert 1 == store.counter["__getitem__", bar_key] - assert 2 == cache.hits - assert 2 == cache.misses - - # test 'foo' __getitem__, should still be cached - assert b"xxx" == cache[foo_key] - assert 1 == store.counter["__getitem__", foo_key] - assert 3 == cache.hits - assert 2 == cache.misses - - # test 'bar' __getitem__, should still be cached - assert b"yyy" == cache[bar_key] - assert 1 == store.counter["__getitem__", bar_key] - assert 4 == cache.hits - assert 2 == cache.misses - - def test_cache_keys(self): - # setup - store = self.CountingClass() - foo_key = self.root + "foo" - bar_key = self.root + "bar" - baz_key = self.root + "baz" - store[foo_key] = b"xxx" - store[bar_key] = b"yyy" - assert 0 == store.counter["__contains__", foo_key] - assert 0 == store.counter["__iter__"] - assert 0 == store.counter["keys"] - cache = self.LRUStoreClass(store, max_size=None) - - # keys should be cached on first call - keys = sorted(cache.keys()) - assert keys == [bar_key, foo_key] - assert 1 == store.counter["keys"] - # keys should now be cached - assert keys == sorted(cache.keys()) - assert 1 == store.counter["keys"] - assert foo_key in cache - assert 1 == store.counter["__contains__", foo_key] - # the next check for `foo_key` is cached - assert foo_key in cache - assert 1 == store.counter["__contains__", foo_key] - assert keys == sorted(cache) - assert 0 == store.counter["__iter__"] - assert 1 == store.counter["keys"] - - # cache should be cleared if store is modified - crude but simple for now - cache[baz_key] = b"zzz" - keys = sorted(cache.keys()) - assert keys == [bar_key, baz_key, foo_key] - assert 2 == store.counter["keys"] - # keys should now be cached - assert keys == sorted(cache.keys()) - assert 2 == store.counter["keys"] - - # manually invalidate keys - cache.invalidate_keys() - keys = sorted(cache.keys()) - assert keys == [bar_key, baz_key, foo_key] - assert 3 == store.counter["keys"] - assert 1 == store.counter["__contains__", foo_key] - assert 0 == store.counter["__iter__"] - cache.invalidate_keys() - keys = sorted(cache) - assert keys == [bar_key, baz_key, foo_key] - assert 4 == store.counter["keys"] - assert 1 == store.counter["__contains__", foo_key] - assert 0 == store.counter["__iter__"] - cache.invalidate_keys() - assert foo_key in cache - assert 4 == store.counter["keys"] - assert 2 == store.counter["__contains__", foo_key] - assert 0 == store.counter["__iter__"] - - # check these would get counted if called directly - assert foo_key in store - assert 3 == store.counter["__contains__", foo_key] - assert keys == sorted(store) - assert 1 == store.counter["__iter__"] - - -def test_getsize(): - store = KVStore(dict()) - store["foo"] = b"aaa" - store["bar"] = b"bbbb" - store["baz/quux"] = b"ccccc" - assert 7 == getsize(store) - assert 5 == getsize(store, "baz") - - store = KVStore(dict()) - store["boo"] = None - assert -1 == getsize(store) - - -@pytest.mark.parametrize("dict_store", [False, True]) -def test_migrate_1to2(dict_store): - from zarr import meta_v1 - - # N.B., version 1 did not support hierarchies, so we only have to be - # concerned about migrating a single array at the root of the store - - # setup - store = dict() if dict_store else KVStore(dict()) - meta = dict( - shape=(100,), - chunks=(10,), - dtype=np.dtype("f4"), - compression="zlib", - compression_opts=1, - fill_value=None, - order="C", - ) - meta_json = meta_v1.encode_metadata(meta) - store["meta"] = meta_json - store["attrs"] = json.dumps(dict()).encode("ascii") - - # run migration - migrate_1to2(store) - - # check results - assert "meta" not in store - assert array_meta_key in store - assert "attrs" not in store - assert attrs_key in store - meta_migrated = decode_array_metadata(store[array_meta_key]) - assert 2 == meta_migrated["zarr_format"] - - # preserved fields - for f in "shape", "chunks", "dtype", "fill_value", "order": - assert meta[f] == meta_migrated[f] - - # migrate should have added empty filters field - assert meta_migrated["filters"] is None - - # check compression and compression_opts migrated to compressor - assert "compression" not in meta_migrated - assert "compression_opts" not in meta_migrated - assert meta_migrated["compressor"] == Zlib(1).get_config() - - # check dict compression_opts - store = dict() if dict_store else KVStore(dict()) - meta["compression"] = "blosc" - meta["compression_opts"] = dict(cname="lz4", clevel=5, shuffle=1) - meta_json = meta_v1.encode_metadata(meta) - store["meta"] = meta_json - store["attrs"] = json.dumps(dict()).encode("ascii") - migrate_1to2(store) - meta_migrated = decode_array_metadata(store[array_meta_key]) - assert "compression" not in meta_migrated - assert "compression_opts" not in meta_migrated - assert meta_migrated["compressor"] == Blosc(cname="lz4", clevel=5, shuffle=1).get_config() - - # check 'none' compression is migrated to None (null in JSON) - store = dict() if dict_store else KVStore(dict()) - meta["compression"] = "none" - meta_json = meta_v1.encode_metadata(meta) - store["meta"] = meta_json - store["attrs"] = json.dumps(dict()).encode("ascii") - migrate_1to2(store) - meta_migrated = decode_array_metadata(store[array_meta_key]) - assert "compression" not in meta_migrated - assert "compression_opts" not in meta_migrated - assert meta_migrated["compressor"] is None - - -def test_format_compatibility(): - # This test is intended to catch any unintended changes that break the ability to - # read data stored with a previous minor version (which should be format-compatible). - - # fixture data - fixture = group(store=DirectoryStore("fixture")) - - # set seed to get consistent random data - np.random.seed(42) - - arrays_chunks = [ - (np.arange(1111, dtype=" 2 else "" - # setup some values - store[prefix + "a"] = b"aaa" - store[prefix + "b"] = b"bbb" - store[prefix + "c/d"] = b"ddd" - store[prefix + "c/e/f"] = b"fff" - - # test iterators on store with data - assert 4 == len(store) - keys = [prefix + "a", prefix + "b", prefix + "c/d", prefix + "c/e/f"] - values = [b"aaa", b"bbb", b"ddd", b"fff"] - items = list(zip(keys, values)) - assert set(keys) == set(store) - assert set(keys) == set(store.keys()) - assert set(values) == set(store.values()) - assert set(items) == set(store.items()) - - def test_getsize(self): - return super().test_getsize() - - def test_hierarchy(self): - return super().test_hierarchy() - - @pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36") - def test_pickle(self): - # internal attribute on ContainerClient isn't serializable for py36 and earlier - super().test_pickle() - - -class TestConsolidatedMetadataStore: - version = 2 - ConsolidatedMetadataClass = ConsolidatedMetadataStore - - @property - def metadata_key(self): - return ".zmetadata" - - def test_bad_format(self): - # setup store with consolidated metadata - store = dict() - consolidated = { - # bad format version - "zarr_consolidated_format": 0, - } - store[self.metadata_key] = json.dumps(consolidated).encode() - - # check appropriate error is raised - with pytest.raises(MetadataError): - self.ConsolidatedMetadataClass(store) - - def test_bad_store_version(self): - with pytest.raises(ValueError): - self.ConsolidatedMetadataClass(KVStoreV3(dict())) - - def test_read_write(self): - # setup store with consolidated metadata - store = dict() - consolidated = { - "zarr_consolidated_format": 1, - "metadata": { - "foo": "bar", - "baz": 42, - }, - } - store[self.metadata_key] = json.dumps(consolidated).encode() - - # create consolidated store - cs = self.ConsolidatedMetadataClass(store) - - # test __contains__, __getitem__ - for key, value in consolidated["metadata"].items(): - assert key in cs - assert value == cs[key] - - # test __delitem__, __setitem__ - with pytest.raises(PermissionError): - del cs["foo"] - with pytest.raises(PermissionError): - cs["bar"] = 0 - with pytest.raises(PermissionError): - cs["spam"] = "eggs" - - -# standalone test we do not want to run on each store. - - -def test_fill_value_change(): - a = zarr.create((10, 10), dtype=int) - - assert a[0, 0] == 0 - - a.fill_value = 1 - - assert a[0, 0] == 1 - - assert json.loads(a.store[".zarray"])["fill_value"] == 1 - - -def test_get_hierarchy_metadata_v2(): - # v2 stores do not have hierarchy metadata (i.e. zarr.json) - with pytest.raises(ValueError): - _get_hierarchy_metadata(KVStore(dict)) - - -def test_normalize_store_arg(tmpdir): - with pytest.raises(ValueError): - normalize_store_arg(dict(), zarr_version=4) - - for ext, Class in [(".zip", ZipStore), (".n5", N5Store)]: - fn = tmpdir.join("store" + ext) - store = normalize_store_arg(str(fn), zarr_version=2, mode="w") - assert isinstance(store, Class) - - if have_fsspec: - import fsspec - - path = tempfile.mkdtemp() - store = normalize_store_arg("file://" + path, zarr_version=2, mode="w") - assert isinstance(store, FSStore) - - store = normalize_store_arg(fsspec.get_mapper("file://" + path)) - assert isinstance(store, FSStore) - - -def test_meta_prefix_6853(): - fixture = pathlib.Path(zarr.__file__).resolve().parent.parent / "fixture" - meta = fixture / "meta" - if not meta.exists(): # pragma: no cover - s = DirectoryStore(str(meta), dimension_separator=".") - a = zarr.open(store=s, mode="w", shape=(2, 2), dtype=" None: - super().__init__(_type) - assert test_value == self.TEST_CONSTANT - self.test_value = test_value - - -def test_ensure_store_v3(): - class InvalidStore: - pass - - with pytest.raises(ValueError): - StoreV3._ensure_store(InvalidStore()) - - # cannot initialize with a store from a different Zarr version - with pytest.raises(ValueError): - StoreV3._ensure_store(KVStore(dict())) - - assert StoreV3._ensure_store(None) is None - - # class with all methods of a MutableMapping will become a KVStoreV3 - assert isinstance(StoreV3._ensure_store(DummyStore), KVStoreV3) - - with pytest.raises(ValueError): - # does not have the methods expected of a MutableMapping - StoreV3._ensure_store(InvalidDummyStore) - - -def test_valid_key(): - store = KVStoreV3(dict) - - # only ascii keys are valid - assert not store._valid_key(5) - assert not store._valid_key(2.8) - - for key in store._valid_key_characters: - assert store._valid_key(key) - - # other characters not in store._valid_key_characters are not allowed - assert not store._valid_key("*") - assert not store._valid_key("~") - assert not store._valid_key("^") - - -def test_validate_key(): - store = KVStoreV3(dict) - - # zarr.json is a valid key - store._validate_key("zarr.json") - # but other keys not starting with meta/ or data/ are not - with pytest.raises(ValueError): - store._validate_key("zar.json") - - # valid ascii keys - for valid in [ - meta_root + "arr1.array.json", - data_root + "arr1.array.json", - meta_root + "subfolder/item_1-0.group.json", - ]: - store._validate_key(valid) - # but otherwise valid keys cannot end in / - with pytest.raises(ValueError): - assert store._validate_key(valid + "/") - - for invalid in [0, "*", "~", "^", "&"]: - with pytest.raises(ValueError): - store._validate_key(invalid) - - -class StoreV3Tests(_StoreTests): - version = 3 - root = meta_root - - def test_getsize(self): - # TODO: determine proper getsize() behavior for v3 - # Currently returns the combined size of entries under - # meta/root/path and data/root/path. - # Any path not under meta/root/ or data/root/ (including zarr.json) - # returns size 0. - - store = self.create_store() - if isinstance(store, dict) or hasattr(store, "getsize"): - assert 0 == getsize(store, "zarr.json") - store[meta_root + "foo/a"] = b"x" - assert 1 == getsize(store) - assert 1 == getsize(store, "foo") - store[meta_root + "foo/b"] = b"x" - assert 2 == getsize(store, "foo") - assert 1 == getsize(store, "foo/b") - store[meta_root + "bar/a"] = b"yy" - assert 2 == getsize(store, "bar") - store[data_root + "bar/a"] = b"zzz" - assert 5 == getsize(store, "bar") - store[data_root + "baz/a"] = b"zzz" - assert 3 == getsize(store, "baz") - assert 10 == getsize(store) - store[data_root + "quux"] = array.array("B", b"zzzz") - assert 14 == getsize(store) - assert 4 == getsize(store, "quux") - store[data_root + "spong"] = np.frombuffer(b"zzzzz", dtype="u1") - assert 19 == getsize(store) - assert 5 == getsize(store, "spong") - store.close() - - def test_init_array(self, dimension_separator_fixture_v3): - pass_dim_sep, want_dim_sep = dimension_separator_fixture_v3 - - store = self.create_store() - path = "arr1" - transformer = DummyStorageTransfomer( - "dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT - ) - init_array( - store, - path=path, - shape=1000, - chunks=100, - dimension_separator=pass_dim_sep, - storage_transformers=[transformer], - ) - - # check metadata - mkey = meta_root + path + ".array.json" - assert mkey in store - meta = store._metadata_class.decode_array_metadata(store[mkey]) - assert (1000,) == meta["shape"] - assert (100,) == meta["chunk_grid"]["chunk_shape"] - assert np.dtype(None) == meta["data_type"] - assert default_compressor == meta["compressor"] - assert meta["fill_value"] is None - # Missing MUST be assumed to be "/" - assert meta["chunk_grid"]["separator"] is want_dim_sep - assert len(meta["storage_transformers"]) == 1 - assert isinstance(meta["storage_transformers"][0], DummyStorageTransfomer) - assert meta["storage_transformers"][0].test_value == DummyStorageTransfomer.TEST_CONSTANT - store.close() - - def test_list_prefix(self): - store = self.create_store() - path = "arr1" - init_array(store, path=path, shape=1000, chunks=100) - - expected = [meta_root + "arr1.array.json", "zarr.json"] - assert sorted(store.list_prefix("")) == expected - - expected = [meta_root + "arr1.array.json"] - assert sorted(store.list_prefix(meta_root.rstrip("/"))) == expected - - # cannot start prefix with '/' - with pytest.raises(ValueError): - store.list_prefix(prefix="/" + meta_root.rstrip("/")) - - def test_equal(self): - store = self.create_store() - assert store == store - - def test_rename_nonexisting(self): - store = self.create_store() - if store.is_erasable(): - with pytest.raises(ValueError): - store.rename("a", "b") - else: - with pytest.raises(NotImplementedError): - store.rename("a", "b") - - def test_get_partial_values(self): - store = self.create_store() - assert store.supports_efficient_get_partial_values in [True, False] - store[data_root + "foo"] = b"abcdefg" - store[data_root + "baz"] = b"z" - assert [b"a"] == store.get_partial_values([(data_root + "foo", (0, 1))]) - assert [ - b"d", - b"b", - b"z", - b"abc", - b"defg", - b"defg", - b"g", - b"ef", - ] == store.get_partial_values( - [ - (data_root + "foo", (3, 1)), - (data_root + "foo", (1, 1)), - (data_root + "baz", (0, 1)), - (data_root + "foo", (0, 3)), - (data_root + "foo", (3, 4)), - (data_root + "foo", (3, None)), - (data_root + "foo", (-1, None)), - (data_root + "foo", (-3, 2)), - ] - ) - - def test_set_partial_values(self): - store = self.create_store() - store.supports_efficient_set_partial_values() - store[data_root + "foo"] = b"abcdefg" - store.set_partial_values([(data_root + "foo", 0, b"hey")]) - assert store[data_root + "foo"] == b"heydefg" - - store.set_partial_values([(data_root + "baz", 0, b"z")]) - assert store[data_root + "baz"] == b"z" - store.set_partial_values( - [ - (data_root + "foo", 1, b"oo"), - (data_root + "baz", 1, b"zzz"), - (data_root + "baz", 4, b"aaaa"), - (data_root + "foo", 6, b"done"), - ] - ) - assert store[data_root + "foo"] == b"hoodefdone" - assert store[data_root + "baz"] == b"zzzzaaaa" - store.set_partial_values( - [ - (data_root + "foo", -2, b"NE"), - (data_root + "baz", -5, b"q"), - ] - ) - assert store[data_root + "foo"] == b"hoodefdoNE" - assert store[data_root + "baz"] == b"zzzq" - - -class TestMappingStoreV3(StoreV3Tests): - def create_store(self, **kwargs): - return KVStoreV3(dict()) - - def test_set_invalid_content(self): - # Generic mappings support non-buffer types - pass - - -class TestMemoryStoreV3(_TestMemoryStore, StoreV3Tests): - def create_store(self, **kwargs): - skip_if_nested_chunks(**kwargs) - return MemoryStoreV3(**kwargs) - - -class TestDirectoryStoreV3(_TestDirectoryStore, StoreV3Tests): - def create_store(self, normalize_keys=False, **kwargs): - # For v3, don't have to skip if nested. - # skip_if_nested_chunks(**kwargs) - - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = DirectoryStoreV3(path, normalize_keys=normalize_keys, **kwargs) - return store - - def test_rename_nonexisting(self): - store = self.create_store() - with pytest.raises(FileNotFoundError): - store.rename(meta_root + "a", meta_root + "b") - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestFSStoreV3(_TestFSStore, StoreV3Tests): - def create_store(self, normalize_keys=False, dimension_separator=".", path=None, **kwargs): - if path is None: - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - - store = FSStoreV3( - path, normalize_keys=normalize_keys, dimension_separator=dimension_separator, **kwargs - ) - return store - - def test_init_array(self): - store = self.create_store() - path = "arr1" - init_array(store, path=path, shape=1000, chunks=100) - - # check metadata - mkey = meta_root + path + ".array.json" - assert mkey in store - meta = store._metadata_class.decode_array_metadata(store[mkey]) - assert (1000,) == meta["shape"] - assert (100,) == meta["chunk_grid"]["chunk_shape"] - assert np.dtype(None) == meta["data_type"] - assert meta["chunk_grid"]["separator"] == "/" - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestFSStoreV3WithKeySeparator(StoreV3Tests): - def create_store(self, normalize_keys=False, key_separator=".", **kwargs): - # Since the user is passing key_separator, that will take priority. - skip_if_nested_chunks(**kwargs) - - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - return FSStoreV3(path, normalize_keys=normalize_keys, key_separator=key_separator) - - -# TODO: enable once N5StoreV3 has been implemented -# @pytest.mark.skipif(True, reason="N5StoreV3 not yet fully implemented") -# class TestN5StoreV3(_TestN5Store, TestDirectoryStoreV3, StoreV3Tests): - - -class TestZipStoreV3(_TestZipStore, StoreV3Tests): - ZipStoreClass = ZipStoreV3 - - def create_store(self, **kwargs): - path = mktemp(suffix=".zip") - atexit.register(os.remove, path) - store = ZipStoreV3(path, mode="w", **kwargs) - return store - - -class TestDBMStoreV3(_TestDBMStore, StoreV3Tests): - def create_store(self, dimension_separator=None): - path = mktemp(suffix=".anydbm") - atexit.register(atexit_rmglob, path + "*") - # create store using default dbm implementation - store = DBMStoreV3(path, flag="n", dimension_separator=dimension_separator) - return store - - -class TestDBMStoreV3Dumb(_TestDBMStoreDumb, StoreV3Tests): - def create_store(self, **kwargs): - path = mktemp(suffix=".dumbdbm") - atexit.register(atexit_rmglob, path + "*") - - import dbm.dumb as dumbdbm - - store = DBMStoreV3(path, flag="n", open=dumbdbm.open, **kwargs) - return store - - -class TestDBMStoreV3Gnu(_TestDBMStoreGnu, StoreV3Tests): - def create_store(self, **kwargs): - gdbm = pytest.importorskip("dbm.gnu") - path = mktemp(suffix=".gdbm") # pragma: no cover - atexit.register(os.remove, path) # pragma: no cover - store = DBMStoreV3( - path, flag="n", open=gdbm.open, write_lock=False, **kwargs - ) # pragma: no cover - return store # pragma: no cover - - -class TestDBMStoreV3NDBM(_TestDBMStoreNDBM, StoreV3Tests): - def create_store(self, **kwargs): - ndbm = pytest.importorskip("dbm.ndbm") - path = mktemp(suffix=".ndbm") # pragma: no cover - atexit.register(atexit_rmglob, path + "*") # pragma: no cover - store = DBMStoreV3(path, flag="n", open=ndbm.open, **kwargs) # pragma: no cover - return store # pragma: no cover - - -class TestDBMStoreV3BerkeleyDB(_TestDBMStoreBerkeleyDB, StoreV3Tests): - def create_store(self, **kwargs): - bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix=".dbm") - atexit.register(os.remove, path) - store = DBMStoreV3(path, flag="n", open=bsddb3.btopen, write_lock=False, **kwargs) - return store - - -class TestLMDBStoreV3(_TestLMDBStore, StoreV3Tests): - def create_store(self, **kwargs): - pytest.importorskip("lmdb") - path = mktemp(suffix=".lmdb") - atexit.register(atexit_rmtree, path) - buffers = True - store = LMDBStoreV3(path, buffers=buffers, **kwargs) - return store - - -class TestSQLiteStoreV3(_TestSQLiteStore, StoreV3Tests): - def create_store(self, **kwargs): - pytest.importorskip("sqlite3") - path = mktemp(suffix=".db") - atexit.register(atexit_rmtree, path) - store = SQLiteStoreV3(path, **kwargs) - return store - - -class TestSQLiteStoreV3InMemory(_TestSQLiteStoreInMemory, StoreV3Tests): - def create_store(self, **kwargs): - pytest.importorskip("sqlite3") - store = SQLiteStoreV3(":memory:", **kwargs) - return store - - -@skip_test_env_var("ZARR_TEST_MONGO") -class TestMongoDBStoreV3(StoreV3Tests): - def create_store(self, **kwargs): - pytest.importorskip("pymongo") - store = MongoDBStoreV3( - host="127.0.0.1", database="zarr_tests", collection="zarr_tests", **kwargs - ) - # start with an empty store - store.clear() - return store - - -@skip_test_env_var("ZARR_TEST_REDIS") -class TestRedisStoreV3(StoreV3Tests): - def create_store(self, **kwargs): - # TODO: this is the default host for Redis on Travis, - # we probably want to generalize this though - pytest.importorskip("redis") - store = RedisStoreV3(host="localhost", port=6379, **kwargs) - # start with an empty store - store.clear() - return store - - -@pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") -class TestStorageTransformerV3(TestMappingStoreV3): - def create_store(self, **kwargs): - inner_store = super().create_store(**kwargs) - dummy_transformer = DummyStorageTransfomer( - "dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT - ) - sharding_transformer = ShardingStorageTransformer( - "indexed", - chunks_per_shard=2, - ) - path = "bla" - init_array( - inner_store, - path=path, - shape=1000, - chunks=100, - dimension_separator=".", - storage_transformers=[dummy_transformer, sharding_transformer], - ) - store = Array(store=inner_store, path=path).chunk_store - store.erase_prefix("data/root/bla/") - store.clear() - return store - - def test_method_forwarding(self): - store = self.create_store() - inner_store = store.inner_store.inner_store - assert store.list() == inner_store.list() - assert store.list_dir(data_root) == inner_store.list_dir(data_root) - - assert store.is_readable() - assert store.is_writeable() - assert store.is_listable() - inner_store._readable = False - inner_store._writeable = False - inner_store._listable = False - assert not store.is_readable() - assert not store.is_writeable() - assert not store.is_listable() - - -class TestLRUStoreCacheV3(_TestLRUStoreCache, StoreV3Tests): - CountingClass = CountingDictV3 - LRUStoreClass = LRUStoreCacheV3 - - -@skip_test_env_var("ZARR_TEST_ABS") -class TestABSStoreV3(_TestABSStore, StoreV3Tests): - ABSStoreClass = ABSStoreV3 - - -def test_normalize_store_arg_v3(tmpdir): - fn = tmpdir.join("store.zip") - store = normalize_store_arg(str(fn), zarr_version=3, mode="w") - assert isinstance(store, ZipStoreV3) - assert "zarr.json" in store - - # can't pass storage_options to non-fsspec store - with pytest.raises(ValueError): - normalize_store_arg(str(fn), zarr_version=3, mode="w", storage_options={"some": "kwargs"}) - - if have_fsspec: - import fsspec - - path = tempfile.mkdtemp() - store = normalize_store_arg("file://" + path, zarr_version=3, mode="w") - assert isinstance(store, FSStoreV3) - assert "zarr.json" in store - - store = normalize_store_arg(fsspec.get_mapper("file://" + path), zarr_version=3) - assert isinstance(store, FSStoreV3) - - # regression for https://github.com/zarr-developers/zarr-python/issues/1382 - # contents of zarr.json are not important for this test - out = {"version": 1, "refs": {"zarr.json": "{...}"}} - store = normalize_store_arg( - "reference://", storage_options={"fo": out, "remote_protocol": "memory"}, zarr_version=3 - ) - assert isinstance(store, FSStoreV3) - - fn = tmpdir.join("store.n5") - with pytest.raises(NotImplementedError): - normalize_store_arg(str(fn), zarr_version=3, mode="w") - - # error on zarr_version=3 with a v2 store - with pytest.raises(ValueError): - normalize_store_arg(KVStore(dict()), zarr_version=3, mode="w") - - # error on zarr_version=2 with a v3 store - with pytest.raises(ValueError): - normalize_store_arg(KVStoreV3(dict()), zarr_version=2, mode="w") - - -class TestConsolidatedMetadataStoreV3(_TestConsolidatedMetadataStore): - version = 3 - ConsolidatedMetadataClass = ConsolidatedMetadataStoreV3 - - @property - def metadata_key(self): - return meta_root + "consolidated/.zmetadata" - - def test_bad_store_version(self): - with pytest.raises(ValueError): - self.ConsolidatedMetadataClass(KVStore(dict())) - - -def test_get_hierarchy_metadata(): - store = KVStoreV3({}) - - # error raised if 'jarr.json' is not in the store - with pytest.raises(ValueError): - _get_hierarchy_metadata(store) - - store["zarr.json"] = _default_entry_point_metadata_v3 - assert _get_hierarchy_metadata(store) == _default_entry_point_metadata_v3 - - # ValueError if only a subset of keys are present - store["zarr.json"] = {"zarr_format": "https://purl.org/zarr/spec/protocol/core/3.0"} - with pytest.raises(ValueError): - _get_hierarchy_metadata(store) - - # ValueError if any unexpected keys are present - extra_metadata = copy.copy(_default_entry_point_metadata_v3) - extra_metadata["extra_key"] = "value" - store["zarr.json"] = extra_metadata - with pytest.raises(ValueError): - _get_hierarchy_metadata(store) - - -def test_top_level_imports(): - for store_name in [ - "ABSStoreV3", - "DBMStoreV3", - "KVStoreV3", - "DirectoryStoreV3", - "LMDBStoreV3", - "LRUStoreCacheV3", - "MemoryStoreV3", - "MongoDBStoreV3", - "RedisStoreV3", - "SQLiteStoreV3", - "ZipStoreV3", - ]: - if v3_api_available: - assert hasattr(zarr, store_name) # pragma: no cover - else: - assert not hasattr(zarr, store_name) # pragma: no cover - - -def test_assert_zarr_v3_api_available_warns_once(): - import zarr._storage.store - - zarr._storage.store._has_warned_about_v3 = False - warnings.resetwarnings() - with pytest.warns() as record: - assert_zarr_v3_api_available() - assert_zarr_v3_api_available() - assert len(record) == 1 - assert "The experimental Zarr V3 implementation" in str(record[0].message) - - -def _get_public_and_dunder_methods(some_class): - return set( - name - for name, _ in inspect.getmembers(some_class, predicate=inspect.isfunction) - if not name.startswith("_") or name.startswith("__") - ) - - -def test_storage_transformer_interface(): - store_v3_methods = _get_public_and_dunder_methods(StoreV3) - store_v3_methods.discard("__init__") - # Note, getitems() isn't mandatory when get_partial_values() is available - store_v3_methods.discard("getitems") - storage_transformer_methods = _get_public_and_dunder_methods(StorageTransformer) - storage_transformer_methods.discard("__init__") - storage_transformer_methods.discard("get_config") - assert storage_transformer_methods == store_v3_methods diff --git a/zarr/tests/test_sync.py b/zarr/tests/test_sync.py deleted file mode 100644 index 3d8ef3a9b7..0000000000 --- a/zarr/tests/test_sync.py +++ /dev/null @@ -1,321 +0,0 @@ -import atexit -import shutil -import tempfile -from multiprocessing import Pool as ProcessPool -from multiprocessing import cpu_count -from multiprocessing.pool import ThreadPool -from tempfile import mkdtemp - -import numpy as np -from numpy.testing import assert_array_equal - -from zarr.attrs import Attributes -from zarr.core import Array -from zarr.hierarchy import Group -from zarr.storage import DirectoryStore, KVStore, atexit_rmtree, init_array, init_group, meta_root -from zarr.sync import ProcessSynchronizer, ThreadSynchronizer - -# zarr_version fixture must be imported although not used directly here -from zarr.tests.test_attrs import TestAttributes, zarr_version # noqa -from zarr.tests.test_core import TestArray -from zarr.tests.test_hierarchy import TestGroup - - -class TestAttributesWithThreadSynchronizer(TestAttributes): - def init_attributes(self, store, read_only=False, cache=True, zarr_version=zarr_version): - key = ".zattrs" if zarr_version == 2 else meta_root + "attrs" - synchronizer = ThreadSynchronizer() - return Attributes( - store, synchronizer=synchronizer, key=key, read_only=read_only, cache=cache - ) - - -class TestAttributesProcessSynchronizer(TestAttributes): - def init_attributes(self, store, read_only=False, cache=True, zarr_version=zarr_version): - key = ".zattrs" if zarr_version == 2 else meta_root + "attrs" - sync_path = mkdtemp() - atexit.register(shutil.rmtree, sync_path) - synchronizer = ProcessSynchronizer(sync_path) - return Attributes( - store, synchronizer=synchronizer, key=key, read_only=read_only, cache=cache - ) - - -def _append(arg): - z, i = arg - import numpy - - x = numpy.empty(1000, dtype="i4") - x[:] = i - shape = z.append(x) - return shape - - -def _set_arange(arg): - z, i = arg - import numpy - - x = numpy.arange(i * 1000, (i * 1000) + 1000, 1) - z[i * 1000 : (i * 1000) + 1000] = x - return i - - -class MixinArraySyncTests: - def test_parallel_setitem(self): - n = 100 - - # setup - arr = self.create_array(shape=n * 1000, chunks=999, dtype="i4") - arr[:] = 0 - pool = self.create_pool() - - # parallel setitem - results = pool.map(_set_arange, zip([arr] * n, range(n)), chunksize=1) - results = sorted(results) - - assert list(range(n)) == results - assert_array_equal(np.arange(n * 1000), arr[:]) - - pool.terminate() - - def test_parallel_append(self): - n = 100 - - # setup - arr = self.create_array(shape=1000, chunks=999, dtype="i4") - arr[:] = 0 - pool = self.create_pool() - - # parallel append - results = pool.map(_append, zip([arr] * n, range(n)), chunksize=1) - results = sorted(results) - - assert [((i + 2) * 1000,) for i in range(n)] == results - assert ((n + 1) * 1000,) == arr.shape - - pool.terminate() - - -class TestArrayWithThreadSynchronizer(TestArray, MixinArraySyncTests): - def create_array(self, read_only=False, **kwargs): - store = KVStore(dict()) - cache_metadata = kwargs.pop("cache_metadata", True) - cache_attrs = kwargs.pop("cache_attrs", True) - write_empty_chunks = kwargs.pop("write_empty_chunks", True) - init_array(store, **kwargs) - return Array( - store, - synchronizer=ThreadSynchronizer(), - read_only=read_only, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, - write_empty_chunks=write_empty_chunks, - ) - - # noinspection PyMethodMayBeStatic - def create_pool(self): - pool = ThreadPool(cpu_count()) - return pool - - def test_hexdigest(self): - # Check basic 1-D array - z = self.create_array(shape=(1050,), chunks=100, dtype="" == actual[-8:] - - -def test_tree_get_icon(): - assert tree_get_icon("Array") == tree_array_icon - assert tree_get_icon("Group") == tree_group_icon - with pytest.raises(ValueError): - tree_get_icon("Baz") - - -@mock.patch.dict("sys.modules", {"ipytree": None}) -def test_tree_widget_missing_ipytree(): - pattern = ( - "Run `pip install zarr[jupyter]` or `conda install ipytree`" - "to get the required ipytree dependency for displaying the tree " - "widget. If using jupyterlab<3, you also need to run " - "`jupyter labextension install ipytree`" - ) - with pytest.raises(ImportError, match=re.escape(pattern)): - tree_widget(None, None, None) - - -def test_retry_call(): - class Fixture: - def __init__(self, pass_on=1): - self.c = 0 - self.pass_on = pass_on - - def __call__(self): - self.c += 1 - if self.c != self.pass_on: - raise PermissionError() - - for x in range(1, 11): - # Any number of failures less than 10 will be accepted. - fixture = Fixture(pass_on=x) - retry_call(fixture, exceptions=(PermissionError,), wait=0) - assert fixture.c == x - - def fail(x): - # Failures after 10 will cause an error to be raised. - retry_call(Fixture(pass_on=x), exceptions=(Exception,), wait=0) - - for x in range(11, 15): - pytest.raises(PermissionError, fail, x) - - -def test_flatten(): - assert list( - flatten( - [ - "0", - [ - "1", - [ - "2", - [ - "3", - [ - 4, - ], - ], - ], - ], - ] - ) - ) == ["0", "1", "2", "3", 4] - assert list(flatten("foo")) == ["f", "o", "o"] - assert list(flatten(["foo"])) == ["foo"] - - -def test_all_equal(): - assert all_equal(0, np.zeros((10, 10, 10))) - assert not all_equal(1, np.zeros((10, 10, 10))) - - assert all_equal(1, np.ones((10, 10, 10))) - assert not all_equal(1, 1 + np.ones((10, 10, 10))) - - assert all_equal(np.nan, np.array([np.nan, np.nan])) - assert not all_equal(np.nan, np.array([np.nan, 1.0])) - - assert all_equal({"a": -1}, np.array([{"a": -1}, {"a": -1}], dtype="object")) - assert not all_equal({"a": -1}, np.array([{"a": -1}, {"a": 2}], dtype="object")) - - assert all_equal(np.timedelta64(999, "D"), np.array([999, 999], dtype="timedelta64[D]")) - assert not all_equal(np.timedelta64(999, "D"), np.array([999, 998], dtype="timedelta64[D]")) - - # all_equal(None, *) always returns False - assert not all_equal(None, np.array([None, None])) - assert not all_equal(None, np.array([None, 10])) - - -def test_json_dumps_numpy_dtype(): - assert json_dumps(np.int64(0)) == json_dumps(0) - assert json_dumps(np.float32(0)) == json_dumps(float(0)) - # Check that we raise the error of the superclass for unsupported object - with pytest.raises(TypeError): - json_dumps(Array) - - -def test_constant_map(): - val = object() - m = ConstantMap(keys=[1, 2], constant=val) - assert len(m) == 2 - assert m[1] is val - assert m[2] is val - assert 1 in m - assert 0 not in m - with pytest.raises(KeyError): - m[0] - assert repr(m) == repr({1: val, 2: val}) diff --git a/zarr/tests/util.py b/zarr/tests/util.py deleted file mode 100644 index b3c3249cab..0000000000 --- a/zarr/tests/util.py +++ /dev/null @@ -1,120 +0,0 @@ -import collections -import os -import tempfile -from typing import Any, Mapping, Sequence -from zarr.context import Context - -from zarr.storage import Store -from zarr._storage.v3 import StoreV3 - -import pytest - - -class CountingDict(Store): - def __init__(self): - self.wrapped = dict() - self.counter = collections.Counter() - - def __len__(self): - self.counter["__len__"] += 1 - return len(self.wrapped) - - def keys(self): - self.counter["keys"] += 1 - return self.wrapped.keys() - - def __iter__(self): - self.counter["__iter__"] += 1 - return iter(self.wrapped) - - def __contains__(self, item): - self.counter["__contains__", item] += 1 - return item in self.wrapped - - def __getitem__(self, item): - self.counter["__getitem__", item] += 1 - return self.wrapped[item] - - def __setitem__(self, key, value): - self.counter["__setitem__", key] += 1 - self.wrapped[key] = value - - def __delitem__(self, key): - self.counter["__delitem__", key] += 1 - del self.wrapped[key] - - def getitems( - self, keys: Sequence[str], *, contexts: Mapping[str, Context] - ) -> Mapping[str, Any]: - for key in keys: - self.counter["__getitem__", key] += 1 - return {k: self.wrapped[k] for k in keys if k in self.wrapped} - - -class CountingDictV3(CountingDict, StoreV3): - pass - - -def skip_test_env_var(name): - """Checks for environment variables indicating whether tests requiring services should be run""" - value = os.environ.get(name, "0") - return pytest.mark.skipif(value == "0", reason="Tests not enabled via environment variable") - - -try: - import fsspec # noqa: F401 - - have_fsspec = True -except ImportError: # pragma: no cover - have_fsspec = False - - -try: - import bsddb3 # noqa: F401 - - have_bsddb3 = True -except ImportError: # pragma: no cover - have_bsddb3 = False - - -try: - import lmdb # noqa: F401 - - have_lmdb = True -except ImportError: # pragma: no cover - have_lmdb = False - - -try: - import sqlite3 # noqa: F401 - - have_sqlite3 = True -except ImportError: # pragma: no cover - have_sqlite3 = False - - -def abs_container(): - from azure.core.exceptions import ResourceExistsError - import azure.storage.blob as asb - - URL = "http://127.0.0.1:10000" - ACCOUNT_NAME = "devstoreaccount1" - KEY = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" - CONN_STR = ( - f"DefaultEndpointsProtocol=http;AccountName={ACCOUNT_NAME};" - f"AccountKey={KEY};BlobEndpoint={URL}/{ACCOUNT_NAME};" - ) - - blob_service_client = asb.BlobServiceClient.from_connection_string(CONN_STR) - try: - container_client = blob_service_client.create_container("test") - except ResourceExistsError: - container_client = blob_service_client.get_container_client("test") - - return container_client - - -def mktemp(**kwargs): - f = tempfile.NamedTemporaryFile(**kwargs) - f.close() - return f.name diff --git a/zarr/types.py b/zarr/types.py deleted file mode 100644 index cc29a350f5..0000000000 --- a/zarr/types.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import Literal, Protocol, Union - -ZARR_VERSION = Literal[2, 3] -DIMENSION_SEPARATOR = Literal[".", "/"] -MEMORY_ORDER = Literal["C", "F"] - - -PathLike = Union[str, bytes, None] - - -class MetaArray(Protocol): - def __array_function__(self, func, types, args, kwargs): - # To be extended - ... diff --git a/zarr/util.py b/zarr/util.py deleted file mode 100644 index 8a96f92c24..0000000000 --- a/zarr/util.py +++ /dev/null @@ -1,790 +0,0 @@ -import inspect -import json -import math -import numbers -from textwrap import TextWrapper -import mmap -import time -from typing import ( - Any, - Callable, - Dict, - Iterator, - Mapping, - Optional, - Tuple, - TypeVar, - Union, - Iterable, - cast, -) - -import numpy as np -from asciitree import BoxStyle, LeftAligned -from asciitree.traversal import Traversal -from numcodecs.compat import ( - ensure_text, - ensure_ndarray_like, - ensure_bytes, - ensure_contiguous_ndarray_like, -) -from numcodecs.ndarray_like import NDArrayLike -from numcodecs.registry import codec_registry -from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo -from zarr.types import DIMENSION_SEPARATOR - -KeyType = TypeVar("KeyType") -ValueType = TypeVar("ValueType") - - -def flatten(arg: Iterable) -> Iterable: - for element in arg: - if isinstance(element, Iterable) and not isinstance(element, (str, bytes)): - yield from flatten(element) - else: - yield element - - -# codecs to use for object dtype convenience API -object_codecs = { - str.__name__: "vlen-utf8", - bytes.__name__: "vlen-bytes", - "array": "vlen-array", -} - - -class NumberEncoder(json.JSONEncoder): - def default(self, o): - # See json.JSONEncoder.default docstring for explanation - # This is necessary to encode numpy dtype - if isinstance(o, numbers.Integral): - return int(o) - if isinstance(o, numbers.Real): - return float(o) - return json.JSONEncoder.default(self, o) - - -def json_dumps(o: Any) -> bytes: - """Write JSON in a consistent, human-readable way.""" - return json.dumps( - o, indent=4, sort_keys=True, ensure_ascii=True, separators=(",", ": "), cls=NumberEncoder - ).encode("ascii") - - -def json_loads(s: Union[bytes, str]) -> Dict[str, Any]: - """Read JSON in a consistent way.""" - return json.loads(ensure_text(s, "utf-8")) - - -def normalize_shape(shape: Union[int, Tuple[int, ...], None]) -> Tuple[int, ...]: - """Convenience function to normalize the `shape` argument.""" - - if shape is None: - raise TypeError("shape is None") - - # handle 1D convenience form - if isinstance(shape, numbers.Integral): - shape = (int(shape),) - - # normalize - shape = cast(Tuple[int, ...], shape) - shape = tuple(int(s) for s in shape) - return shape - - -# code to guess chunk shape, adapted from h5py - -CHUNK_BASE = 256 * 1024 # Multiplier by which chunks are adjusted -CHUNK_MIN = 128 * 1024 # Soft lower limit (128k) -CHUNK_MAX = 64 * 1024 * 1024 # Hard upper limit - - -def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: - """ - Guess an appropriate chunk layout for an array, given its shape and - the size of each element in bytes. Will allocate chunks only as large - as MAX_SIZE. Chunks are generally close to some power-of-2 fraction of - each axis, slightly favoring bigger values for the last index. - Undocumented and subject to change without warning. - """ - - ndims = len(shape) - # require chunks to have non-zero length for all dimensions - chunks = np.maximum(np.array(shape, dtype="=f8"), 1) - - # Determine the optimal chunk size in bytes using a PyTables expression. - # This is kept as a float. - dset_size = np.prod(chunks) * typesize - target_size = CHUNK_BASE * (2 ** np.log10(dset_size / (1024.0 * 1024))) - - if target_size > CHUNK_MAX: - target_size = CHUNK_MAX - elif target_size < CHUNK_MIN: - target_size = CHUNK_MIN - - idx = 0 - while True: - # Repeatedly loop over the axes, dividing them by 2. Stop when: - # 1a. We're smaller than the target chunk size, OR - # 1b. We're within 50% of the target chunk size, AND - # 2. The chunk is smaller than the maximum chunk size - - chunk_bytes = np.prod(chunks) * typesize - - if ( - chunk_bytes < target_size or abs(chunk_bytes - target_size) / target_size < 0.5 - ) and chunk_bytes < CHUNK_MAX: - break - - if np.prod(chunks) == 1: - break # Element size larger than CHUNK_MAX - - chunks[idx % ndims] = math.ceil(chunks[idx % ndims] / 2.0) - idx += 1 - - return tuple(int(x) for x in chunks) - - -def normalize_chunks(chunks: Any, shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: - """Convenience function to normalize the `chunks` argument for an array - with the given `shape`.""" - - # N.B., expect shape already normalized - - # handle auto-chunking - if chunks is None or chunks is True: - return guess_chunks(shape, typesize) - - # handle no chunking - if chunks is False: - return shape - - # handle 1D convenience form - if isinstance(chunks, numbers.Integral): - chunks = tuple(int(chunks) for _ in shape) - - # handle bad dimensionality - if len(chunks) > len(shape): - raise ValueError("too many dimensions in chunks") - - # handle underspecified chunks - if len(chunks) < len(shape): - # assume chunks across remaining dimensions - chunks += shape[len(chunks) :] - - # handle None or -1 in chunks - if -1 in chunks or None in chunks: - chunks = tuple(s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks)) - - chunks = tuple(int(c) for c in chunks) - return chunks - - -def normalize_dtype(dtype: Union[str, np.dtype], object_codec) -> Tuple[np.dtype, Any]: - # convenience API for object arrays - if inspect.isclass(dtype): - dtype = dtype.__name__ - if isinstance(dtype, str): - # allow ':' to delimit class from codec arguments - tokens = dtype.split(":") - key = tokens[0] - if key in object_codecs: - dtype = np.dtype(object) - if object_codec is None: - codec_id = object_codecs[key] - if len(tokens) > 1: - args = tokens[1].split(",") - else: - args = [] - try: - object_codec = codec_registry[codec_id](*args) - except KeyError as e: # pragma: no cover - raise ValueError( - f"codec {codec_id!r} for object type {key!r} is not " - f"available; please provide an object_codec manually" - ) from e - return dtype, object_codec - - dtype = np.dtype(dtype) - - # don't allow generic datetime64 or timedelta64, require units to be specified - if dtype == np.dtype("M8") or dtype == np.dtype("m8"): - raise ValueError( - "datetime64 and timedelta64 dtypes with generic units " - 'are not supported, please specify units (e.g., "M8[ns]")' - ) - - return dtype, object_codec - - -# noinspection PyTypeChecker -def is_total_slice(item, shape: Tuple[int]) -> bool: - """Determine whether `item` specifies a complete slice of array with the - given `shape`. Used to optimize __setitem__ operations on the Chunk - class.""" - - # N.B., assume shape is normalized - - if item == Ellipsis: - return True - if item == slice(None): - return True - if isinstance(item, slice): - item = (item,) - if isinstance(item, tuple): - return all( - ( - ( - isinstance(it, slice) - and ( - (it == slice(None)) - or ((it.stop - it.start == sh) and (it.step in [1, None])) - ) - ) - # The only scalar edge case, indexing with int 0 along a size-1 dimension - # is identical to a total slice - # https://github.com/zarr-developers/zarr-python/issues/1730 - or (isinstance(it, int) and it == 0 and sh == 1) - ) - for it, sh in zip(item, shape) - ) - else: - raise TypeError(f"expected slice or tuple of slices, found {item!r}") - - -def normalize_resize_args(old_shape, *args): - # normalize new shape argument - if len(args) == 1: - new_shape = args[0] - else: - new_shape = args - if isinstance(new_shape, int): - new_shape = (new_shape,) - else: - new_shape = tuple(new_shape) - if len(new_shape) != len(old_shape): - raise ValueError("new shape must have same number of dimensions") - - # handle None in new_shape - new_shape = tuple(s if n is None else int(n) for s, n in zip(old_shape, new_shape)) - - return new_shape - - -def human_readable_size(size) -> str: - if size < 2**10: - return f"{size}" - elif size < 2**20: - return f"{size / float(2**10):.1f}K" - elif size < 2**30: - return f"{size / float(2**20):.1f}M" - elif size < 2**40: - return f"{size / float(2**30):.1f}G" - elif size < 2**50: - return f"{size / float(2**40):.1f}T" - else: - return f"{size / float(2**50):.1f}P" - - -def normalize_order(order: str) -> str: - order = str(order).upper() - if order not in ["C", "F"]: - raise ValueError(f"order must be either 'C' or 'F', found: {order!r}") - return order - - -def normalize_dimension_separator(sep: Optional[str]) -> Optional[DIMENSION_SEPARATOR]: - if sep in (".", "/", None): - return cast(Optional[DIMENSION_SEPARATOR], sep) - else: - raise ValueError(f"dimension_separator must be either '.' or '/', found: {sep!r}") - - -def normalize_fill_value(fill_value, dtype: np.dtype): - if fill_value is None or dtype.hasobject: - # no fill value - pass - elif not isinstance(fill_value, np.void) and fill_value == 0: - # this should be compatible across numpy versions for any array type, including - # structured arrays - fill_value = np.zeros((), dtype=dtype)[()] - - elif dtype.kind == "U": - # special case unicode because of encoding issues on Windows if passed through numpy - # https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713 - - if not isinstance(fill_value, str): - raise ValueError( - f"fill_value {fill_value!r} is not valid for dtype {dtype}; " - f"must be a unicode string" - ) - - else: - try: - if isinstance(fill_value, bytes) and dtype.kind == "V": - # special case for numpy 1.14 compatibility - fill_value = np.array(fill_value, dtype=dtype.str).view(dtype)[()] - else: - fill_value = np.array(fill_value, dtype=dtype)[()] - - except Exception as e: - # re-raise with our own error message to be helpful - raise ValueError( - f"fill_value {fill_value!r} is not valid for dtype {dtype}; " - f"nested exception: {e}" - ) from e - - return fill_value - - -def normalize_storage_path(path: Union[str, bytes, None]) -> str: - # handle bytes - if isinstance(path, bytes): - path = str(path, "ascii") - - # ensure str - if path is not None and not isinstance(path, str): - path = str(path) - - if path: - # convert backslash to forward slash - path = path.replace("\\", "/") - - # ensure no leading slash - while len(path) > 0 and path[0] == "/": - path = path[1:] - - # ensure no trailing slash - while len(path) > 0 and path[-1] == "/": - path = path[:-1] - - # collapse any repeated slashes - previous_char = None - collapsed = "" - for char in path: - if char == "/" and previous_char == "/": - pass - else: - collapsed += char - previous_char = char - path = collapsed - - # don't allow path segments with just '.' or '..' - segments = path.split("/") - if any(s in {".", ".."} for s in segments): - raise ValueError("path containing '.' or '..' segment not allowed") - - else: - path = "" - - return path - - -def buffer_size(v) -> int: - return ensure_ndarray_like(v).nbytes - - -def info_text_report(items: Dict[Any, Any]) -> str: - keys = [k for k, v in items] - max_key_len = max(len(k) for k in keys) - report = "" - for k, v in items: - wrapper = TextWrapper( - width=80, - initial_indent=k.ljust(max_key_len) + " : ", - subsequent_indent=" " * max_key_len + " : ", - ) - text = wrapper.fill(str(v)) - report += text + "\n" - return report - - -def info_html_report(items) -> str: - report = '' - report += "" - for k, v in items: - report += ( - f"" - f'' - f'' - f"" - ) - report += "" - report += "
{k}{v}
" - return report - - -class InfoReporter: - def __init__(self, obj): - self.obj = obj - self.items = self.obj.info_items() - - def __repr__(self): - return info_text_report(self.items) - - def _repr_html_(self): - return info_html_report(self.items) - - -class TreeNode: - def __init__(self, obj, depth=0, level=None): - self.obj = obj - self.depth = depth - self.level = level - - def get_children(self): - if hasattr(self.obj, "values"): - if self.level is None or self.depth < self.level: - depth = self.depth + 1 - return [TreeNode(o, depth=depth, level=self.level) for o in self.obj.values()] - return [] - - def get_text(self): - name = self.obj.name.split("/")[-1] or "/" - if hasattr(self.obj, "shape"): - name += f" {self.obj.shape} {self.obj.dtype}" - return name - - def get_type(self): - return type(self.obj).__name__ - - -class TreeTraversal(Traversal): - def get_children(self, node): - return node.get_children() - - def get_root(self, tree): - return tree - - def get_text(self, node): - return node.get_text() - - -tree_group_icon = "folder" -tree_array_icon = "table" - - -def tree_get_icon(stype: str) -> str: - if stype == "Array": - return tree_array_icon - elif stype == "Group": - return tree_group_icon - else: - raise ValueError(f"Unknown type: {stype}") - - -def tree_widget_sublist(node, root=False, expand=False): - import ipytree - - result = ipytree.Node() - result.icon = tree_get_icon(node.get_type()) - if root or (expand is True) or (isinstance(expand, int) and node.depth < expand): - result.opened = True - else: - result.opened = False - result.name = node.get_text() - result.nodes = [tree_widget_sublist(c, expand=expand) for c in node.get_children()] - result.disabled = True - - return result - - -def tree_widget(group, expand, level): - try: - import ipytree - except ImportError as e: - raise ImportError( - f"{e}: Run `pip install zarr[jupyter]` or `conda install ipytree`" - f"to get the required ipytree dependency for displaying the tree " - f"widget. If using jupyterlab<3, you also need to run " - f"`jupyter labextension install ipytree`" - ) from e - - result = ipytree.Tree() - root = TreeNode(group, level=level) - result.add_node(tree_widget_sublist(root, root=True, expand=expand)) - - return result - - -class TreeViewer: - def __init__(self, group, expand=False, level=None): - self.group = group - self.expand = expand - self.level = level - - self.text_kwargs = dict(horiz_len=2, label_space=1, indent=1) - - self.bytes_kwargs = dict( - UP_AND_RIGHT="+", HORIZONTAL="-", VERTICAL="|", VERTICAL_AND_RIGHT="+" - ) - - self.unicode_kwargs = dict( - UP_AND_RIGHT="\u2514", - HORIZONTAL="\u2500", - VERTICAL="\u2502", - VERTICAL_AND_RIGHT="\u251C", - ) - - def __bytes__(self): - drawer = LeftAligned( - traverse=TreeTraversal(), draw=BoxStyle(gfx=self.bytes_kwargs, **self.text_kwargs) - ) - root = TreeNode(self.group, level=self.level) - result = drawer(root) - - # Unicode characters slip in on Python 3. - # So we need to straighten that out first. - result = result.encode() - - return result - - def __unicode__(self): - drawer = LeftAligned( - traverse=TreeTraversal(), draw=BoxStyle(gfx=self.unicode_kwargs, **self.text_kwargs) - ) - root = TreeNode(self.group, level=self.level) - return drawer(root) - - def __repr__(self): - return self.__unicode__() - - def _repr_mimebundle_(self, **kwargs): - tree = tree_widget(self.group, expand=self.expand, level=self.level) - return tree._repr_mimebundle_(**kwargs) - - -def check_array_shape(param, array, shape): - if not hasattr(array, "shape"): - raise TypeError(f"parameter {param!r}: expected an array-like object, got {type(array)!r}") - if array.shape != shape: - raise ValueError( - f"parameter {param!r}: expected array with shape {shape!r}, got {array.shape!r}" - ) - - -def is_valid_python_name(name): - from keyword import iskeyword - - return name.isidentifier() and not iskeyword(name) - - -class NoLock: - """A lock that doesn't lock.""" - - def __enter__(self): - pass - - def __exit__(self, *args): - pass - - -nolock = NoLock() - - -class PartialReadBuffer: - def __init__(self, store_key, chunk_store): - self.chunk_store = chunk_store - # is it fsstore or an actual fsspec map object - assert hasattr(self.chunk_store, "map") - self.map = self.chunk_store.map - self.fs = self.chunk_store.fs - self.store_key = store_key - self.buff = None - self.nblocks = None - self.start_points = None - self.n_per_block = None - self.start_points_max = None - self.read_blocks = set() - - _key_path = self.map._key_to_str(store_key) - _key_path = _key_path.split("/") - _chunk_path = [self.chunk_store._normalize_key(_key_path[-1])] - _key_path = "/".join(_key_path[:-1] + _chunk_path) - self.key_path = _key_path - - def prepare_chunk(self): - assert self.buff is None - header = self.fs.read_block(self.key_path, 0, 16) - nbytes, self.cbytes, blocksize = cbuffer_sizes(header) - typesize, _shuffle, _memcpyd = cbuffer_metainfo(header) - self.buff = mmap.mmap(-1, self.cbytes) - self.buff[0:16] = header - self.nblocks = nbytes / blocksize - self.nblocks = ( - int(self.nblocks) if self.nblocks == int(self.nblocks) else int(self.nblocks + 1) - ) - if self.nblocks == 1: - self.buff = self.read_full() - return - start_points_buffer = self.fs.read_block(self.key_path, 16, int(self.nblocks * 4)) - self.start_points = np.frombuffer(start_points_buffer, count=self.nblocks, dtype=np.int32) - self.start_points_max = self.start_points.max() - self.buff[16 : (16 + (self.nblocks * 4))] = start_points_buffer - self.n_per_block = blocksize / typesize - - def read_part(self, start, nitems): - assert self.buff is not None - if self.nblocks == 1: - return - start_block = int(start / self.n_per_block) - wanted_decompressed = 0 - while wanted_decompressed < nitems: - if start_block not in self.read_blocks: - start_byte = self.start_points[start_block] - if start_byte == self.start_points_max: - stop_byte = self.cbytes - else: - stop_byte = self.start_points[self.start_points > start_byte].min() - length = stop_byte - start_byte - data_buff = self.fs.read_block(self.key_path, start_byte, length) - self.buff[start_byte:stop_byte] = data_buff - self.read_blocks.add(start_block) - if wanted_decompressed == 0: - wanted_decompressed += ((start_block + 1) * self.n_per_block) - start - else: - wanted_decompressed += self.n_per_block - start_block += 1 - - def read_full(self): - return self.chunk_store[self.store_key] - - -class UncompressedPartialReadBufferV3: - def __init__(self, store_key, chunk_store, itemsize): - assert chunk_store.supports_efficient_get_partial_values - self.chunk_store = chunk_store - self.store_key = store_key - self.itemsize = itemsize - - def prepare_chunk(self): - pass - - def read_part(self, start, nitems): - return self.chunk_store.get_partial_values( - [(self.store_key, (start * self.itemsize, nitems * self.itemsize))] - )[0] - - def read_full(self): - return self.chunk_store[self.store_key] - - -def retry_call( - callabl: Callable, - args=None, - kwargs=None, - exceptions: Tuple[Any, ...] = (), - retries: int = 10, - wait: float = 0.1, -) -> Any: - """ - Make several attempts to invoke the callable. If one of the given exceptions - is raised, wait the given period of time and retry up to the given number of - retries. - """ - - if args is None: - args = () - if kwargs is None: - kwargs = {} - - for attempt in range(1, retries + 1): - try: - return callabl(*args, **kwargs) - except exceptions: - if attempt < retries: - time.sleep(wait) - else: - raise - - -def all_equal(value: Any, array: Any): - """ - Test if all the elements of an array are equivalent to a value. - If `value` is None, then this function does not do any comparison and - returns False. - """ - - if value is None: - return False - if not value: - # if `value` is falsey, then just 1 truthy value in `array` - # is sufficient to return False. We assume here that np.any is - # optimized to return on the first truthy value in `array`. - try: - return not np.any(array) - except (TypeError, ValueError): # pragma: no cover - pass - if np.issubdtype(array.dtype, np.object_): - # we have to flatten the result of np.equal to handle outputs like - # [np.array([True,True]), True, True] - return all(flatten(np.equal(value, array, dtype=array.dtype))) - else: - # Numpy errors if you call np.isnan on custom dtypes, so ensure - # we are working with floats before calling isnan - if np.issubdtype(array.dtype, np.floating) and np.isnan(value): - return np.all(np.isnan(array)) - else: - # using == raises warnings from numpy deprecated pattern, but - # using np.equal() raises type errors for structured dtypes... - return np.all(value == array) - - -def ensure_contiguous_ndarray_or_bytes(buf) -> Union[NDArrayLike, bytes]: - """Convenience function to coerce `buf` to ndarray-like array or bytes. - - First check if `buf` can be zero-copy converted to a contiguous array. - If not, `buf` will be copied to a newly allocated `bytes` object. - - Parameters - ---------- - buf : ndarray-like, array-like, or bytes-like - A numpy array like object such as numpy.ndarray, cupy.ndarray, or - any object exporting a buffer interface. - - Returns - ------- - arr : NDArrayLike or bytes - A ndarray-like or bytes object - """ - - try: - return ensure_contiguous_ndarray_like(buf) - except TypeError: - # An error is raised if `buf` couldn't be zero-copy converted - return ensure_bytes(buf) - - -class ConstantMap(Mapping[KeyType, ValueType]): - """A read-only map that maps all keys to the same constant value - - Useful if you want to call `getitems()` with the same context for all keys. - - Parameters - ---------- - keys - The keys of the map. Will be copied to a frozenset if it isn't already. - constant - The constant that all keys are mapping to. - """ - - def __init__(self, keys: Iterable[KeyType], constant: ValueType) -> None: - self._keys = keys if isinstance(keys, frozenset) else frozenset(keys) - self._constant = constant - - def __getitem__(self, key: KeyType) -> ValueType: - if key not in self._keys: - raise KeyError(repr(key)) - return self._constant - - def __iter__(self) -> Iterator[KeyType]: - return iter(self._keys) - - def __len__(self) -> int: - return len(self._keys) - - def __contains__(self, key: object) -> bool: - return key in self._keys - - def __repr__(self) -> str: - return repr({k: v for k, v in self.items()})