@@ -751,6 +751,7 @@ def test_custom_defaults(self, icf_path):
751
751
schema = vcz .VcfZarrSchema (
752
752
format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
753
753
fields = [],
754
+ dimensions = {},
754
755
defaults = custom_defaults ,
755
756
)
756
757
@@ -761,6 +762,7 @@ def test_partial_defaults(self, icf_path):
761
762
schema1 = vcz .VcfZarrSchema (
762
763
format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
763
764
fields = [],
765
+ dimensions = {},
764
766
defaults = {"compressor" : {"id" : "blosc" , "cname" : "zlib" , "clevel" : 5 }},
765
767
)
766
768
assert schema1 .defaults ["compressor" ] == {
@@ -774,6 +776,7 @@ def test_partial_defaults(self, icf_path):
774
776
schema2 = vcz .VcfZarrSchema (
775
777
format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
776
778
fields = [],
779
+ dimensions = {},
777
780
defaults = {"filters" : [{"id" : "delta" }]},
778
781
)
779
782
assert (
@@ -819,27 +822,21 @@ def test_dimension_initialization(self):
819
822
assert dim1 .size == 100
820
823
assert dim1 .chunk_size == 20
821
824
822
- # Test with only size (chunk_size should default to size)
823
- dim2 = vcz .VcfZarrDimension ( size = 50 )
824
- assert dim2 .size == 50
825
- assert dim2 .chunk_size == 50
825
+ def test_unchunked ( self ):
826
+ dim = vcz .VcfZarrDimension . unchunked ( 50 )
827
+ assert dim .size == 50
828
+ assert dim .chunk_size == 50
826
829
827
- def test_asdict (self ):
828
- # When chunk_size equals size, it shouldn't be included in dict
829
- dim1 = vcz . VcfZarrDimension ( size = 100 , chunk_size = 100 )
830
- assert dim1 . asdict () == { "size" : 100 }
830
+ def test_unchunked_zero_size (self ):
831
+ dim = vcz . VcfZarrDimension . unchunked ( 0 )
832
+ assert dim . size == 0
833
+ assert dim . chunk_size == 1
831
834
832
- # When chunk_size differs from size, it should be included in dict
833
- dim2 = vcz .VcfZarrDimension (size = 100 , chunk_size = 20 )
834
- assert dim2 .asdict () == {"size" : 100 , "chunk_size" : 20 }
835
+ def test_asdict ( self ):
836
+ dim1 = vcz .VcfZarrDimension (size = 100 , chunk_size = 101 )
837
+ assert dim1 .asdict () == {"size" : 100 , "chunk_size" : 101 }
835
838
836
839
def test_fromdict (self ):
837
- # With only size
838
- dim1 = vcz .VcfZarrDimension .fromdict ({"size" : 75 })
839
- assert dim1 .size == 75
840
- assert dim1 .chunk_size == 75
841
-
842
- # With both size and chunk_size
843
840
dim2 = vcz .VcfZarrDimension .fromdict ({"size" : 75 , "chunk_size" : 25 })
844
841
assert dim2 .size == 75
845
842
assert dim2 .chunk_size == 25
@@ -898,6 +895,98 @@ def test_max_number_exceeds_dimension_size(
898
895
vcz .ZarrArraySpec .from_field (vcf_field , schema )
899
896
900
897
898
+ class TestStandardDimensions :
899
+ @pytest .mark .parametrize (
900
+ ("size" , "chunk_size" , "expected_chunk_size" ),
901
+ [
902
+ (0 , None , 1 ),
903
+ (0 , 100 , 100 ),
904
+ (1 , 1 , 1 ),
905
+ (1 , None , 1 ),
906
+ (1 , 10 , 10 ),
907
+ (1_001 , None , 1_000 ),
908
+ (10 ** 9 , None , 1_000 ),
909
+ (999 , None , 999 ),
910
+ (1 , 100_000 , 100_000 ),
911
+ ],
912
+ )
913
+ def test_variants (self , size , chunk_size , expected_chunk_size ):
914
+ dims = vcz .standard_dimensions (
915
+ variants_size = size , variants_chunk_size = chunk_size , samples_size = 0
916
+ )
917
+ assert dims ["variants" ] == vcz .VcfZarrDimension (size , expected_chunk_size )
918
+
919
+ @pytest .mark .parametrize (
920
+ ("size" , "chunk_size" , "expected_chunk_size" ),
921
+ [
922
+ (0 , None , 1 ),
923
+ (0 , 100 , 100 ),
924
+ (1 , 1 , 1 ),
925
+ (1 , None , 1 ),
926
+ (1 , 10 , 10 ),
927
+ (10_001 , None , 10_000 ),
928
+ (10 ** 9 , None , 10_000 ),
929
+ (9_999 , None , 9_999 ),
930
+ (1 , 100_000 , 100_000 ),
931
+ ],
932
+ )
933
+ def test_samples (self , size , chunk_size , expected_chunk_size ):
934
+ dims = vcz .standard_dimensions (
935
+ variants_size = 0 , samples_size = size , samples_chunk_size = chunk_size
936
+ )
937
+ assert dims ["samples" ] == vcz .VcfZarrDimension (size , expected_chunk_size )
938
+
939
+ @pytest .mark .parametrize (
940
+ ("kwargs" , "expected" ),
941
+ [
942
+ (
943
+ {"variants_size" : 1 , "samples_size" : 1 , "alleles_size" : 2 },
944
+ {
945
+ "variants" : {"size" : 1 , "chunk_size" : 1 },
946
+ "samples" : {"size" : 1 , "chunk_size" : 1 },
947
+ "alleles" : {"size" : 2 , "chunk_size" : 2 },
948
+ "alt_alleles" : {"size" : 1 , "chunk_size" : 1 },
949
+ },
950
+ ),
951
+ (
952
+ {"variants_size" : 0 , "samples_size" : 1 , "alleles_size" : 1 },
953
+ {
954
+ "variants" : {"size" : 0 , "chunk_size" : 1 },
955
+ "samples" : {"size" : 1 , "chunk_size" : 1 },
956
+ "alleles" : {"size" : 1 , "chunk_size" : 1 },
957
+ },
958
+ ),
959
+ (
960
+ {"variants_size" : 0 , "samples_size" : 1 , "alleles_size" : 0 },
961
+ {
962
+ "variants" : {"size" : 0 , "chunk_size" : 1 },
963
+ "samples" : {"size" : 1 , "chunk_size" : 1 },
964
+ "alleles" : {"size" : 0 , "chunk_size" : 1 },
965
+ },
966
+ ),
967
+ (
968
+ {"variants_size" : 0 , "samples_size" : 1 , "filters_size" : 2 },
969
+ {
970
+ "variants" : {"size" : 0 , "chunk_size" : 1 },
971
+ "samples" : {"size" : 1 , "chunk_size" : 1 },
972
+ "filters" : {"size" : 2 , "chunk_size" : 2 },
973
+ },
974
+ ),
975
+ ],
976
+ )
977
+ def test_examples (self , kwargs , expected ):
978
+ dims = {k : v .asdict () for k , v in vcz .standard_dimensions (** kwargs ).items ()}
979
+ assert dims == expected
980
+
981
+ @pytest .mark .parametrize ("field" , ["ploidy" , "genotypes" ])
982
+ @pytest .mark .parametrize ("size" , [0 , 1 , 2 ])
983
+ def test_simple_fields (self , field , size ):
984
+ dims = vcz .standard_dimensions (
985
+ samples_size = 1 , variants_size = 1 , ** {f"{ field } _size" : size }
986
+ )
987
+ assert dims [field ].asdict () == {"size" : size , "chunk_size" : max (1 , size )}
988
+
989
+
901
990
def test_create_index_errors (tmp_path ):
902
991
root = zarr .open (tmp_path )
903
992
root ["foobar" ] = np .array ([1 , 2 , 3 ])
0 commit comments