@@ -751,6 +751,7 @@ def test_custom_defaults(self, icf_path):
751751 schema = vcz .VcfZarrSchema (
752752 format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
753753 fields = [],
754+ dimensions = {},
754755 defaults = custom_defaults ,
755756 )
756757
@@ -761,6 +762,7 @@ def test_partial_defaults(self, icf_path):
761762 schema1 = vcz .VcfZarrSchema (
762763 format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
763764 fields = [],
765+ dimensions = {},
764766 defaults = {"compressor" : {"id" : "blosc" , "cname" : "zlib" , "clevel" : 5 }},
765767 )
766768 assert schema1 .defaults ["compressor" ] == {
@@ -774,6 +776,7 @@ def test_partial_defaults(self, icf_path):
774776 schema2 = vcz .VcfZarrSchema (
775777 format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
776778 fields = [],
779+ dimensions = {},
777780 defaults = {"filters" : [{"id" : "delta" }]},
778781 )
779782 assert (
@@ -819,27 +822,21 @@ def test_dimension_initialization(self):
819822 assert dim1 .size == 100
820823 assert dim1 .chunk_size == 20
821824
822- # Test with only size (chunk_size should default to size)
823- dim2 = vcz .VcfZarrDimension ( size = 50 )
824- assert dim2 .size == 50
825- assert dim2 .chunk_size == 50
825+ def test_unchunked ( self ):
826+ dim = vcz .VcfZarrDimension . unchunked ( 50 )
827+ assert dim .size == 50
828+ assert dim .chunk_size == 50
826829
827- def test_asdict (self ):
828- # When chunk_size equals size, it shouldn't be included in dict
829- dim1 = vcz . VcfZarrDimension ( size = 100 , chunk_size = 100 )
830- assert dim1 . asdict () == { "size" : 100 }
830+ def test_unchunked_zero_size (self ):
831+ dim = vcz . VcfZarrDimension . unchunked ( 0 )
832+ assert dim . size == 0
833+ assert dim . chunk_size == 1
831834
832- # When chunk_size differs from size, it should be included in dict
833- dim2 = vcz .VcfZarrDimension (size = 100 , chunk_size = 20 )
834- assert dim2 .asdict () == {"size" : 100 , "chunk_size" : 20 }
835+ def test_asdict ( self ):
836+ dim1 = vcz .VcfZarrDimension (size = 100 , chunk_size = 101 )
837+ assert dim1 .asdict () == {"size" : 100 , "chunk_size" : 101 }
835838
836839 def test_fromdict (self ):
837- # With only size
838- dim1 = vcz .VcfZarrDimension .fromdict ({"size" : 75 })
839- assert dim1 .size == 75
840- assert dim1 .chunk_size == 75
841-
842- # With both size and chunk_size
843840 dim2 = vcz .VcfZarrDimension .fromdict ({"size" : 75 , "chunk_size" : 25 })
844841 assert dim2 .size == 75
845842 assert dim2 .chunk_size == 25
@@ -898,6 +895,98 @@ def test_max_number_exceeds_dimension_size(
898895 vcz .ZarrArraySpec .from_field (vcf_field , schema )
899896
900897
898+ class TestStandardDimensions :
899+ @pytest .mark .parametrize (
900+ ("size" , "chunk_size" , "expected_chunk_size" ),
901+ [
902+ (0 , None , 1 ),
903+ (0 , 100 , 100 ),
904+ (1 , 1 , 1 ),
905+ (1 , None , 1 ),
906+ (1 , 10 , 10 ),
907+ (1_001 , None , 1_000 ),
908+ (10 ** 9 , None , 1_000 ),
909+ (999 , None , 999 ),
910+ (1 , 100_000 , 100_000 ),
911+ ],
912+ )
913+ def test_variants (self , size , chunk_size , expected_chunk_size ):
914+ dims = vcz .standard_dimensions (
915+ variants_size = size , variants_chunk_size = chunk_size , samples_size = 0
916+ )
917+ assert dims ["variants" ] == vcz .VcfZarrDimension (size , expected_chunk_size )
918+
919+ @pytest .mark .parametrize (
920+ ("size" , "chunk_size" , "expected_chunk_size" ),
921+ [
922+ (0 , None , 1 ),
923+ (0 , 100 , 100 ),
924+ (1 , 1 , 1 ),
925+ (1 , None , 1 ),
926+ (1 , 10 , 10 ),
927+ (10_001 , None , 10_000 ),
928+ (10 ** 9 , None , 10_000 ),
929+ (9_999 , None , 9_999 ),
930+ (1 , 100_000 , 100_000 ),
931+ ],
932+ )
933+ def test_samples (self , size , chunk_size , expected_chunk_size ):
934+ dims = vcz .standard_dimensions (
935+ variants_size = 0 , samples_size = size , samples_chunk_size = chunk_size
936+ )
937+ assert dims ["samples" ] == vcz .VcfZarrDimension (size , expected_chunk_size )
938+
939+ @pytest .mark .parametrize (
940+ ("kwargs" , "expected" ),
941+ [
942+ (
943+ {"variants_size" : 1 , "samples_size" : 1 , "alleles_size" : 2 },
944+ {
945+ "variants" : {"size" : 1 , "chunk_size" : 1 },
946+ "samples" : {"size" : 1 , "chunk_size" : 1 },
947+ "alleles" : {"size" : 2 , "chunk_size" : 2 },
948+ "alt_alleles" : {"size" : 1 , "chunk_size" : 1 },
949+ },
950+ ),
951+ (
952+ {"variants_size" : 0 , "samples_size" : 1 , "alleles_size" : 1 },
953+ {
954+ "variants" : {"size" : 0 , "chunk_size" : 1 },
955+ "samples" : {"size" : 1 , "chunk_size" : 1 },
956+ "alleles" : {"size" : 1 , "chunk_size" : 1 },
957+ },
958+ ),
959+ (
960+ {"variants_size" : 0 , "samples_size" : 1 , "alleles_size" : 0 },
961+ {
962+ "variants" : {"size" : 0 , "chunk_size" : 1 },
963+ "samples" : {"size" : 1 , "chunk_size" : 1 },
964+ "alleles" : {"size" : 0 , "chunk_size" : 1 },
965+ },
966+ ),
967+ (
968+ {"variants_size" : 0 , "samples_size" : 1 , "filters_size" : 2 },
969+ {
970+ "variants" : {"size" : 0 , "chunk_size" : 1 },
971+ "samples" : {"size" : 1 , "chunk_size" : 1 },
972+ "filters" : {"size" : 2 , "chunk_size" : 2 },
973+ },
974+ ),
975+ ],
976+ )
977+ def test_examples (self , kwargs , expected ):
978+ dims = {k : v .asdict () for k , v in vcz .standard_dimensions (** kwargs ).items ()}
979+ assert dims == expected
980+
981+ @pytest .mark .parametrize ("field" , ["ploidy" , "genotypes" ])
982+ @pytest .mark .parametrize ("size" , [0 , 1 , 2 ])
983+ def test_simple_fields (self , field , size ):
984+ dims = vcz .standard_dimensions (
985+ samples_size = 1 , variants_size = 1 , ** {f"{ field } _size" : size }
986+ )
987+ assert dims [field ].asdict () == {"size" : size , "chunk_size" : max (1 , size )}
988+
989+
901990def test_create_index_errors (tmp_path ):
902991 root = zarr .open (tmp_path )
903992 root ["foobar" ] = np .array ([1 , 2 , 3 ])
0 commit comments