1313import numpy as np
1414import zarr
1515
16+ from bio2zarr .zarr_utils import ZARR_FORMAT_KWARGS
17+
1618from .. import constants , core , provenance
1719from . import icf
1820
@@ -532,8 +534,7 @@ def init(
532534 )
533535
534536 self .path .mkdir ()
535- store = zarr .DirectoryStore (self .path )
536- root = zarr .group (store = store )
537+ root = zarr .open (store = self .path , mode = "a" , ** ZARR_FORMAT_KWARGS )
537538 root .attrs .update (
538539 {
539540 "vcf_zarr_version" : "0.2" ,
@@ -549,8 +550,7 @@ def init(
549550 self .wip_path .mkdir ()
550551 self .arrays_path .mkdir ()
551552 self .partitions_path .mkdir ()
552- store = zarr .DirectoryStore (self .arrays_path )
553- root = zarr .group (store = store )
553+ root = zarr .open (store = self .arrays_path , mode = "a" , ** ZARR_FORMAT_KWARGS )
554554
555555 total_chunks = 0
556556 for field in self .schema .fields :
@@ -574,7 +574,8 @@ def encode_samples(self, root):
574574 raise ValueError ("Subsetting or reordering samples not supported currently" )
575575 array = root .array (
576576 "sample_id" ,
577- [sample .id for sample in self .schema .samples ],
577+ data = [sample .id for sample in self .schema .samples ],
578+ shape = len (self .schema .samples ),
578579 dtype = "str" ,
579580 compressor = DEFAULT_ZARR_COMPRESSOR ,
580581 chunks = (self .schema .samples_chunk_size ,),
@@ -585,15 +586,17 @@ def encode_samples(self, root):
585586 def encode_contig_id (self , root ):
586587 array = root .array (
587588 "contig_id" ,
588- [contig .id for contig in self .schema .contigs ],
589+ data = [contig .id for contig in self .schema .contigs ],
590+ shape = len (self .schema .contigs ),
589591 dtype = "str" ,
590592 compressor = DEFAULT_ZARR_COMPRESSOR ,
591593 )
592594 array .attrs ["_ARRAY_DIMENSIONS" ] = ["contigs" ]
593595 if all (contig .length is not None for contig in self .schema .contigs ):
594596 array = root .array (
595597 "contig_length" ,
596- [contig .length for contig in self .schema .contigs ],
598+ data = [contig .length for contig in self .schema .contigs ],
599+ shape = len (self .schema .contigs ),
597600 dtype = np .int64 ,
598601 compressor = DEFAULT_ZARR_COMPRESSOR ,
599602 )
@@ -604,7 +607,8 @@ def encode_filter_id(self, root):
604607 # https://github.com/sgkit-dev/vcf-zarr-spec/issues/19
605608 array = root .array (
606609 "filter_id" ,
607- [filt .id for filt in self .schema .filters ],
610+ data = [filt .id for filt in self .schema .filters ],
611+ shape = len (self .schema .filters ),
608612 dtype = "str" ,
609613 compressor = DEFAULT_ZARR_COMPRESSOR ,
610614 )
@@ -618,14 +622,15 @@ def init_array(self, root, array_spec, variants_dim_size):
618622 # Truncate the variants dimension is max_variant_chunks was specified
619623 shape [0 ] = variants_dim_size
620624 a = root .empty (
621- array_spec .name ,
625+ name = array_spec .name ,
622626 shape = shape ,
623627 chunks = array_spec .chunks ,
624628 dtype = array_spec .dtype ,
625629 compressor = numcodecs .get_codec (array_spec .compressor ),
626630 filters = [numcodecs .get_codec (filt ) for filt in array_spec .filters ],
627631 object_codec = object_codec ,
628632 dimension_separator = self .metadata .dimension_separator ,
633+ ** ZARR_FORMAT_KWARGS ,
629634 )
630635 a .attrs .update (
631636 {
@@ -690,9 +695,7 @@ def init_partition_array(self, partition_index, name):
690695 # Overwrite any existing WIP files
691696 wip_path = self .wip_partition_array_path (partition_index , name )
692697 shutil .copytree (src , wip_path , dirs_exist_ok = True )
693- store = zarr .DirectoryStore (self .wip_partition_path (partition_index ))
694- wip_root = zarr .group (store = store )
695- array = wip_root [name ]
698+ array = zarr .open_array (store = wip_path , mode = "a" )
696699 logger .debug (f"Opened empty array { array .name } <{ array .dtype } > @ { wip_path } " )
697700 return array
698701
@@ -909,8 +912,7 @@ def finalise(self, show_progress=False):
909912 def create_index (self ):
910913 """Create an index to support efficient region queries."""
911914
912- store = zarr .DirectoryStore (self .path )
913- root = zarr .open_group (store = store , mode = "r+" )
915+ root = zarr .open_group (store = self .path , mode = "r+" )
914916
915917 contig = root ["variant_contig" ]
916918 pos = root ["variant_position" ]
0 commit comments