Skip to content

Commit 8d9ec10

Browse files
committed
Changes to run on Zarr v3 (but always using v2 for Zarr format)
1 parent 0a95188 commit 8d9ec10

File tree

1 file changed

+29
-8
lines changed

1 file changed

+29
-8
lines changed

bio2zarr/vcf2zarr/vcz.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import numcodecs
1313
import numpy as np
1414
import zarr
15+
from packaging.version import Version
1516

1617
from .. import constants, core, provenance
1718
from . import icf
@@ -470,6 +471,17 @@ class VcfZarrWriteSummary(core.JsonDataclass):
470471
max_encoding_memory: str
471472

472473

474+
def _zarr_v3() -> bool:
475+
return Version(zarr.__version__).major >= 3
476+
477+
478+
if _zarr_v3():
479+
# Use zarr format v2 even when running with zarr-python v3
480+
ZARR_FORMAT_KWARGS = dict(zarr_format=2)
481+
else:
482+
ZARR_FORMAT_KWARGS = dict()
483+
484+
473485
class VcfZarrWriter:
474486
def __init__(self, path):
475487
self.path = pathlib.Path(path)
@@ -532,7 +544,7 @@ def init(
532544
)
533545

534546
self.path.mkdir()
535-
root = zarr.open(store=self.path, mode="a")
547+
root = zarr.open(store=self.path, mode="a", **ZARR_FORMAT_KWARGS)
536548
root.attrs.update(
537549
{
538550
"vcf_zarr_version": "0.2",
@@ -548,7 +560,7 @@ def init(
548560
self.wip_path.mkdir()
549561
self.arrays_path.mkdir()
550562
self.partitions_path.mkdir()
551-
root = zarr.open(store=self.arrays_path, mode="a")
563+
root = zarr.open(store=self.arrays_path, mode="a", **ZARR_FORMAT_KWARGS)
552564

553565
total_chunks = 0
554566
for field in self.schema.fields:
@@ -572,7 +584,8 @@ def encode_samples(self, root):
572584
raise ValueError("Subsetting or reordering samples not supported currently")
573585
array = root.array(
574586
"sample_id",
575-
[sample.id for sample in self.schema.samples],
587+
data=[sample.id for sample in self.schema.samples],
588+
shape=len(self.schema.samples),
576589
dtype="str",
577590
compressor=DEFAULT_ZARR_COMPRESSOR,
578591
chunks=(self.schema.samples_chunk_size,),
@@ -583,15 +596,17 @@ def encode_samples(self, root):
583596
def encode_contig_id(self, root):
584597
array = root.array(
585598
"contig_id",
586-
[contig.id for contig in self.schema.contigs],
599+
data=[contig.id for contig in self.schema.contigs],
600+
shape=len(self.schema.contigs),
587601
dtype="str",
588602
compressor=DEFAULT_ZARR_COMPRESSOR,
589603
)
590604
array.attrs["_ARRAY_DIMENSIONS"] = ["contigs"]
591605
if all(contig.length is not None for contig in self.schema.contigs):
592606
array = root.array(
593607
"contig_length",
594-
[contig.length for contig in self.schema.contigs],
608+
data=[contig.length for contig in self.schema.contigs],
609+
shape=len(self.schema.contigs),
595610
dtype=np.int64,
596611
compressor=DEFAULT_ZARR_COMPRESSOR,
597612
)
@@ -602,7 +617,8 @@ def encode_filter_id(self, root):
602617
# https://github.com/sgkit-dev/vcf-zarr-spec/issues/19
603618
array = root.array(
604619
"filter_id",
605-
[filt.id for filt in self.schema.filters],
620+
data=[filt.id for filt in self.schema.filters],
621+
shape=len(self.schema.filters),
606622
dtype="str",
607623
compressor=DEFAULT_ZARR_COMPRESSOR,
608624
)
@@ -616,14 +632,15 @@ def init_array(self, root, array_spec, variants_dim_size):
616632
# Truncate the variants dimension is max_variant_chunks was specified
617633
shape[0] = variants_dim_size
618634
a = root.empty(
619-
array_spec.name,
635+
name=array_spec.name,
620636
shape=shape,
621637
chunks=array_spec.chunks,
622638
dtype=array_spec.dtype,
623639
compressor=numcodecs.get_codec(array_spec.compressor),
624640
filters=[numcodecs.get_codec(filt) for filt in array_spec.filters],
625641
object_codec=object_codec,
626642
dimension_separator=self.metadata.dimension_separator,
643+
**ZARR_FORMAT_KWARGS,
627644
)
628645
a.attrs.update(
629646
{
@@ -688,7 +705,11 @@ def init_partition_array(self, partition_index, name):
688705
# Overwrite any existing WIP files
689706
wip_path = self.wip_partition_array_path(partition_index, name)
690707
shutil.copytree(src, wip_path, dirs_exist_ok=True)
691-
wip_root = zarr.open(store=self.wip_partition_path(partition_index), mode="a")
708+
wip_root = zarr.open(
709+
store=self.wip_partition_path(partition_index),
710+
mode="a",
711+
**ZARR_FORMAT_KWARGS,
712+
)
692713
array = wip_root[name]
693714
logger.debug(f"Opened empty array {array.name} <{array.dtype}> @ {wip_path}")
694715
return array

0 commit comments

Comments
 (0)