13
13
import numpy as np
14
14
import zarr
15
15
16
+ from bio2zarr .zarr_utils import ZARR_FORMAT_KWARGS
17
+
16
18
from .. import constants , core , provenance
17
19
from . import icf
18
20
@@ -532,8 +534,7 @@ def init(
532
534
)
533
535
534
536
self .path .mkdir ()
535
- store = zarr .DirectoryStore (self .path )
536
- root = zarr .group (store = store )
537
+ root = zarr .open (store = self .path , mode = "a" , ** ZARR_FORMAT_KWARGS )
537
538
root .attrs .update (
538
539
{
539
540
"vcf_zarr_version" : "0.2" ,
@@ -549,8 +550,7 @@ def init(
549
550
self .wip_path .mkdir ()
550
551
self .arrays_path .mkdir ()
551
552
self .partitions_path .mkdir ()
552
- store = zarr .DirectoryStore (self .arrays_path )
553
- root = zarr .group (store = store )
553
+ root = zarr .open (store = self .arrays_path , mode = "a" , ** ZARR_FORMAT_KWARGS )
554
554
555
555
total_chunks = 0
556
556
for field in self .schema .fields :
@@ -574,7 +574,8 @@ def encode_samples(self, root):
574
574
raise ValueError ("Subsetting or reordering samples not supported currently" )
575
575
array = root .array (
576
576
"sample_id" ,
577
- [sample .id for sample in self .schema .samples ],
577
+ data = [sample .id for sample in self .schema .samples ],
578
+ shape = len (self .schema .samples ),
578
579
dtype = "str" ,
579
580
compressor = DEFAULT_ZARR_COMPRESSOR ,
580
581
chunks = (self .schema .samples_chunk_size ,),
@@ -585,15 +586,17 @@ def encode_samples(self, root):
585
586
def encode_contig_id (self , root ):
586
587
array = root .array (
587
588
"contig_id" ,
588
- [contig .id for contig in self .schema .contigs ],
589
+ data = [contig .id for contig in self .schema .contigs ],
590
+ shape = len (self .schema .contigs ),
589
591
dtype = "str" ,
590
592
compressor = DEFAULT_ZARR_COMPRESSOR ,
591
593
)
592
594
array .attrs ["_ARRAY_DIMENSIONS" ] = ["contigs" ]
593
595
if all (contig .length is not None for contig in self .schema .contigs ):
594
596
array = root .array (
595
597
"contig_length" ,
596
- [contig .length for contig in self .schema .contigs ],
598
+ data = [contig .length for contig in self .schema .contigs ],
599
+ shape = len (self .schema .contigs ),
597
600
dtype = np .int64 ,
598
601
compressor = DEFAULT_ZARR_COMPRESSOR ,
599
602
)
@@ -604,7 +607,8 @@ def encode_filter_id(self, root):
604
607
# https://github.com/sgkit-dev/vcf-zarr-spec/issues/19
605
608
array = root .array (
606
609
"filter_id" ,
607
- [filt .id for filt in self .schema .filters ],
610
+ data = [filt .id for filt in self .schema .filters ],
611
+ shape = len (self .schema .filters ),
608
612
dtype = "str" ,
609
613
compressor = DEFAULT_ZARR_COMPRESSOR ,
610
614
)
@@ -618,14 +622,15 @@ def init_array(self, root, array_spec, variants_dim_size):
618
622
# Truncate the variants dimension is max_variant_chunks was specified
619
623
shape [0 ] = variants_dim_size
620
624
a = root .empty (
621
- array_spec .name ,
625
+ name = array_spec .name ,
622
626
shape = shape ,
623
627
chunks = array_spec .chunks ,
624
628
dtype = array_spec .dtype ,
625
629
compressor = numcodecs .get_codec (array_spec .compressor ),
626
630
filters = [numcodecs .get_codec (filt ) for filt in array_spec .filters ],
627
631
object_codec = object_codec ,
628
632
dimension_separator = self .metadata .dimension_separator ,
633
+ ** ZARR_FORMAT_KWARGS ,
629
634
)
630
635
a .attrs .update (
631
636
{
@@ -690,9 +695,7 @@ def init_partition_array(self, partition_index, name):
690
695
# Overwrite any existing WIP files
691
696
wip_path = self .wip_partition_array_path (partition_index , name )
692
697
shutil .copytree (src , wip_path , dirs_exist_ok = True )
693
- store = zarr .DirectoryStore (self .wip_partition_path (partition_index ))
694
- wip_root = zarr .group (store = store )
695
- array = wip_root [name ]
698
+ array = zarr .open_array (store = wip_path , mode = "a" )
696
699
logger .debug (f"Opened empty array { array .name } <{ array .dtype } > @ { wip_path } " )
697
700
return array
698
701
@@ -909,8 +912,7 @@ def finalise(self, show_progress=False):
909
912
def create_index (self ):
910
913
"""Create an index to support efficient region queries."""
911
914
912
- store = zarr .DirectoryStore (self .path )
913
- root = zarr .open_group (store = store , mode = "r+" )
915
+ root = zarr .open_group (store = self .path , mode = "r+" )
914
916
915
917
contig = root ["variant_contig" ]
916
918
pos = root ["variant_position" ]
0 commit comments