13
13
import numpy as np
14
14
import zarr
15
15
16
- from bio2zarr .zarr_utils import ZARR_FORMAT_KWARGS
16
+ from bio2zarr .zarr_utils import ZARR_FORMAT_KWARGS , zarr_v3
17
17
18
18
from .. import constants , core , provenance
19
19
from . import icf
@@ -571,7 +571,7 @@ def init(
571
571
def encode_samples (self , root ):
572
572
if self .schema .samples != self .icf .metadata .samples :
573
573
raise ValueError ("Subsetting or reordering samples not supported currently" )
574
- array = root .array (
574
+ array = root .create_dataset (
575
575
"sample_id" ,
576
576
data = [sample .id for sample in self .schema .samples ],
577
577
shape = len (self .schema .samples ),
@@ -583,7 +583,7 @@ def encode_samples(self, root):
583
583
logger .debug ("Samples done" )
584
584
585
585
def encode_contig_id (self , root ):
586
- array = root .array (
586
+ array = root .create_dataset (
587
587
"contig_id" ,
588
588
data = [contig .id for contig in self .schema .contigs ],
589
589
shape = len (self .schema .contigs ),
@@ -592,7 +592,7 @@ def encode_contig_id(self, root):
592
592
)
593
593
array .attrs ["_ARRAY_DIMENSIONS" ] = ["contigs" ]
594
594
if all (contig .length is not None for contig in self .schema .contigs ):
595
- array = root .array (
595
+ array = root .create_dataset (
596
596
"contig_length" ,
597
597
data = [contig .length for contig in self .schema .contigs ],
598
598
shape = len (self .schema .contigs ),
@@ -604,7 +604,7 @@ def encode_contig_id(self, root):
604
604
def encode_filter_id (self , root ):
605
605
# TODO need a way to store description also
606
606
# https://github.com/sgkit-dev/vcf-zarr-spec/issues/19
607
- array = root .array (
607
+ array = root .create_dataset (
608
608
"filter_id" ,
609
609
data = [filt .id for filt in self .schema .filters ],
610
610
shape = len (self .schema .filters ),
@@ -614,9 +614,17 @@ def encode_filter_id(self, root):
614
614
array .attrs ["_ARRAY_DIMENSIONS" ] = ["filters" ]
615
615
616
616
def init_array (self , root , array_spec , variants_dim_size ):
617
- object_codec = None
617
+ kwargs = dict (ZARR_FORMAT_KWARGS )
618
+ filters = [numcodecs .get_codec (filt ) for filt in array_spec .filters ]
618
619
if array_spec .dtype == "O" :
619
- object_codec = numcodecs .VLenUTF8 ()
620
+ if zarr_v3 ():
621
+ filters = [* list (filters ), numcodecs .VLenUTF8 ()]
622
+ else :
623
+ kwargs ["object_codec" ] = numcodecs .VLenUTF8 ()
624
+
625
+ if not zarr_v3 ():
626
+ kwargs ["dimension_separator" ] = self .metadata .dimension_separator
627
+
620
628
shape = list (array_spec .shape )
621
629
# Truncate the variants dimension is max_variant_chunks was specified
622
630
shape [0 ] = variants_dim_size
@@ -626,10 +634,8 @@ def init_array(self, root, array_spec, variants_dim_size):
626
634
chunks = array_spec .chunks ,
627
635
dtype = array_spec .dtype ,
628
636
compressor = numcodecs .get_codec (array_spec .compressor ),
629
- filters = [numcodecs .get_codec (filt ) for filt in array_spec .filters ],
630
- object_codec = object_codec ,
631
- dimension_separator = self .metadata .dimension_separator ,
632
- ** ZARR_FORMAT_KWARGS ,
637
+ filters = filters ,
638
+ ** kwargs ,
633
639
)
634
640
a .attrs .update (
635
641
{
@@ -945,13 +951,16 @@ def create_index(self):
945
951
c_start_idx = c_end_idx + 1
946
952
947
953
index = np .array (index , dtype = np .int32 )
948
- array = root .array (
954
+ kwargs = {}
955
+ if not zarr_v3 ():
956
+ kwargs ["dimension_separator" ] = self .metadata .dimension_separator
957
+ array = root .create_dataset (
949
958
"region_index" ,
950
959
data = index ,
951
960
shape = index .shape ,
952
961
dtype = index .dtype ,
953
962
compressor = numcodecs .Blosc ("zstd" , clevel = 9 , shuffle = 0 ),
954
- dimension_separator = self . metadata . dimension_separator ,
963
+ ** kwargs ,
955
964
)
956
965
array .attrs ["_ARRAY_DIMENSIONS" ] = [
957
966
"region_index_values" ,
0 commit comments