13
13
import numpy as np
14
14
import zarr
15
15
16
- from bio2zarr .zarr_utils import ZARR_FORMAT_KWARGS
16
+ from bio2zarr .zarr_utils import ZARR_FORMAT_KWARGS , zarr_v3
17
17
18
18
from .. import constants , core , provenance
19
19
from . import icf
@@ -572,7 +572,7 @@ def init(
572
572
def encode_samples (self , root ):
573
573
if self .schema .samples != self .icf .metadata .samples :
574
574
raise ValueError ("Subsetting or reordering samples not supported currently" )
575
- array = root .array (
575
+ array = root .create_dataset (
576
576
"sample_id" ,
577
577
data = [sample .id for sample in self .schema .samples ],
578
578
shape = len (self .schema .samples ),
@@ -584,7 +584,7 @@ def encode_samples(self, root):
584
584
logger .debug ("Samples done" )
585
585
586
586
def encode_contig_id (self , root ):
587
- array = root .array (
587
+ array = root .create_dataset (
588
588
"contig_id" ,
589
589
data = [contig .id for contig in self .schema .contigs ],
590
590
shape = len (self .schema .contigs ),
@@ -593,7 +593,7 @@ def encode_contig_id(self, root):
593
593
)
594
594
array .attrs ["_ARRAY_DIMENSIONS" ] = ["contigs" ]
595
595
if all (contig .length is not None for contig in self .schema .contigs ):
596
- array = root .array (
596
+ array = root .create_dataset (
597
597
"contig_length" ,
598
598
data = [contig .length for contig in self .schema .contigs ],
599
599
shape = len (self .schema .contigs ),
@@ -605,7 +605,7 @@ def encode_contig_id(self, root):
605
605
def encode_filter_id (self , root ):
606
606
# TODO need a way to store description also
607
607
# https://github.com/sgkit-dev/vcf-zarr-spec/issues/19
608
- array = root .array (
608
+ array = root .create_dataset (
609
609
"filter_id" ,
610
610
data = [filt .id for filt in self .schema .filters ],
611
611
shape = len (self .schema .filters ),
@@ -615,9 +615,17 @@ def encode_filter_id(self, root):
615
615
array .attrs ["_ARRAY_DIMENSIONS" ] = ["filters" ]
616
616
617
617
def init_array (self , root , array_spec , variants_dim_size ):
618
- object_codec = None
618
+ kwargs = dict (ZARR_FORMAT_KWARGS )
619
+ filters = [numcodecs .get_codec (filt ) for filt in array_spec .filters ]
619
620
if array_spec .dtype == "O" :
620
- object_codec = numcodecs .VLenUTF8 ()
621
+ if zarr_v3 ():
622
+ filters = [* list (filters ), numcodecs .VLenUTF8 ()]
623
+ else :
624
+ kwargs ["object_codec" ] = numcodecs .VLenUTF8 ()
625
+
626
+ if not zarr_v3 ():
627
+ kwargs ["dimension_separator" ] = self .metadata .dimension_separator
628
+
621
629
shape = list (array_spec .shape )
622
630
# Truncate the variants dimension is max_variant_chunks was specified
623
631
shape [0 ] = variants_dim_size
@@ -627,10 +635,8 @@ def init_array(self, root, array_spec, variants_dim_size):
627
635
chunks = array_spec .chunks ,
628
636
dtype = array_spec .dtype ,
629
637
compressor = numcodecs .get_codec (array_spec .compressor ),
630
- filters = [numcodecs .get_codec (filt ) for filt in array_spec .filters ],
631
- object_codec = object_codec ,
632
- dimension_separator = self .metadata .dimension_separator ,
633
- ** ZARR_FORMAT_KWARGS ,
638
+ filters = filters ,
639
+ ** kwargs ,
634
640
)
635
641
a .attrs .update (
636
642
{
@@ -946,13 +952,16 @@ def create_index(self):
946
952
c_start_idx = c_end_idx + 1
947
953
948
954
index = np .array (index , dtype = np .int32 )
949
- array = root .array (
955
+ kwargs = {}
956
+ if not zarr_v3 ():
957
+ kwargs ["dimension_separator" ] = self .metadata .dimension_separator
958
+ array = root .create_dataset (
950
959
"region_index" ,
951
960
data = index ,
952
961
shape = index .shape ,
953
962
dtype = index .dtype ,
954
963
compressor = numcodecs .Blosc ("zstd" , clevel = 9 , shuffle = 0 ),
955
- dimension_separator = self . metadata . dimension_separator ,
964
+ ** kwargs ,
956
965
)
957
966
array .attrs ["_ARRAY_DIMENSIONS" ] = [
958
967
"region_index_values" ,
0 commit comments