diff --git a/docs/examples/1kg.schema.json b/docs/examples/1kg.schema.json index 808185f2f..a7fd1e54a 100644 --- a/docs/examples/1kg.schema.json +++ b/docs/examples/1kg.schema.json @@ -1,1246 +1,55 @@ { - "format_version": "0.4", - "samples_chunk_size": 1000, - "variants_chunk_size": 10000, - "samples": [ - { - "id": "HG00096" - }, - { - "id": "HG00099" - }, - { - "id": "HG00105" - }, - { - "id": "HG00118" - }, - { - "id": "HG00129" - }, - { - "id": "HG00148" - }, - { - "id": "HG00177" - }, - { - "id": "HG00182" - }, - { - "id": "HG00242" - }, - { - "id": "HG00254" - }, - { - "id": "HG00265" - }, - { - "id": "HG00271" - }, - { - "id": "HG00274" - }, - { - "id": "HG00332" - }, - { - "id": "HG00335" - }, - { - "id": "HG00369" - }, - { - "id": "HG00421" - }, - { - "id": "HG00436" - }, - { - "id": "HG00452" - }, - { - "id": "HG00472" - }, - { - "id": "HG00530" - }, - { - "id": "HG00534" - }, - { - "id": "HG00583" - }, - { - "id": "HG00590" - }, - { - "id": "HG00598" - }, - { - "id": "HG00607" - }, - { - "id": "HG00619" - }, - { - "id": "HG00623" - }, - { - "id": "HG00657" - }, - { - "id": "HG00663" - }, - { - "id": "HG00704" - }, - { - "id": "HG00705" - }, - { - "id": "HG00733" - }, - { - "id": "HG00864" - }, - { - "id": "HG00881" - }, - { - "id": "HG01052" - }, - { - "id": "HG01070" - }, - { - "id": "HG01075" - }, - { - "id": "HG01164" - }, - { - "id": "HG01174" - }, - { - "id": "HG01241" - }, - { - "id": "HG01248" - }, - { - "id": "HG01256" - }, - { - "id": "HG01275" - }, - { - "id": "HG01284" - }, - { - "id": "HG01334" - }, - { - "id": "HG01348" - }, - { - "id": "HG01396" - }, - { - "id": "HG01443" - }, - { - "id": "HG01491" - }, - { - "id": "HG01498" - }, - { - "id": "HG01537" - }, - { - "id": "HG01572" - }, - { - "id": "HG01606" - }, - { - "id": "HG01623" - }, - { - "id": "HG01630" - }, - { - "id": "HG01783" - }, - { - "id": "HG01784" - }, - { - "id": "HG01790" - }, - { - "id": "HG01799" - }, - { - "id": "HG01801" - }, - { - "id": "HG01806" - }, - { - "id": "HG01812" - }, - { - "id": "HG01813" - }, - { - "id": "HG01817" - }, - { - "id": "HG01848" - }, - { - "id": "HG01849" - }, - { - "id": "HG01857" - }, - { - "id": "HG01863" - }, - { - "id": "HG01874" - }, - { - "id": "HG01915" - }, - { - "id": "HG01924" - }, - { - "id": "HG01965" - }, - { - "id": "HG01970" - }, - { - "id": "HG01991" - }, - { - "id": "HG02010" - }, - { - "id": "HG02020" - }, - { - "id": "HG02054" - }, - { - "id": "HG02086" - }, - { - "id": "HG02087" - }, - { - "id": "HG02116" - }, - { - "id": "HG02122" - }, - { - "id": "HG02130" - }, - { - "id": "HG02131" - }, - { - "id": "HG02152" - }, - { - "id": "HG02154" - }, - { - "id": "HG02165" - }, - { - "id": "HG02224" - }, - { - "id": "HG02232" - }, - { - "id": "HG02236" - }, - { - "id": "HG02250" - }, - { - "id": "HG02259" - }, - { - "id": "HG02298" - }, - { - "id": "HG02318" - }, - { - "id": "HG02345" - }, - { - "id": "HG02351" - }, - { - "id": "HG02363" - }, - { - "id": "HG02373" - }, - { - "id": "HG02383" - }, - { - "id": "HG02384" - }, - { - "id": "HG02386" - }, - { - "id": "HG02388" - }, - { - "id": "HG02389" - }, - { - "id": "HG02397" - }, - { - "id": "HG02419" - }, - { - "id": "HG02462" - }, - { - "id": "HG02464" - }, - { - "id": "HG02497" - }, - { - "id": "HG02511" - }, - { - "id": "HG02521" - }, - { - "id": "HG02561" - }, - { - "id": "HG02574" - }, - { - "id": "HG02580" - }, - { - "id": "HG02595" - }, - { - "id": "HG02603" - }, - { - "id": "HG02629" - }, - { - "id": "HG02651" - }, - { - "id": "HG02682" - }, - { - "id": "HG02688" - }, - { - "id": "HG02690" - }, - { - "id": "HG02699" - }, - { - "id": "HG02760" - }, - { - "id": "HG02768" - }, - { - "id": "HG02771" - }, - { - "id": "HG02792" - }, - { - "id": "HG02798" - }, - { - "id": "HG02811" - }, - { - "id": "HG02814" - }, - { - "id": "HG02840" - }, - { - "id": "HG02870" - }, - { - "id": "HG02881" - }, - { - "id": "HG02970" - }, - { - "id": "HG02973" - }, - { - "id": "HG03009" - }, - { - "id": "HG03046" - }, - { - "id": "HG03074" - }, - { - "id": "HG03091" - }, - { - "id": "HG03105" - }, - { - "id": "HG03127" - }, - { - "id": "HG03193" - }, - { - "id": "HG03224" - }, - { - "id": "HG03237" - }, - { - "id": "HG03241" - }, - { - "id": "HG03247" - }, - { - "id": "HG03259" - }, - { - "id": "HG03267" - }, - { - "id": "HG03354" - }, - { - "id": "HG03366" - }, - { - "id": "HG03367" - }, - { - "id": "HG03380" - }, - { - "id": "HG03419" - }, - { - "id": "HG03449" - }, - { - "id": "HG03451" - }, - { - "id": "HG03458" - }, - { - "id": "HG03490" - }, - { - "id": "HG03491" - }, - { - "id": "HG03511" - }, - { - "id": "HG03556" - }, - { - "id": "HG03563" - }, - { - "id": "HG03598" - }, - { - "id": "HG03603" - }, - { - "id": "HG03607" - }, - { - "id": "HG03636" - }, - { - "id": "HG03684" - }, - { - "id": "HG03686" - }, - { - "id": "HG03690" - }, - { - "id": "HG03731" - }, - { - "id": "HG03740" - }, - { - "id": "HG03755" - }, - { - "id": "HG03800" - }, - { - "id": "HG03815" - }, - { - "id": "HG03832" - }, - { - "id": "HG03850" - }, - { - "id": "HG03873" - }, - { - "id": "HG03897" - }, - { - "id": "HG03905" - }, - { - "id": "HG03937" - }, - { - "id": "HG03948" - }, - { - "id": "HG03973" - }, - { - "id": "HG04054" - }, - { - "id": "HG04059" - }, - { - "id": "HG04063" - }, - { - "id": "HG04096" - }, - { - "id": "HG04099" - }, - { - "id": "HG04140" - }, - { - "id": "HG04171" - }, - { - "id": "HG04209" - }, - { - "id": "HG04210" - }, - { - "id": "HG04229" - }, - { - "id": "HG04239" - }, - { - "id": "NA07347" - }, - { - "id": "NA11918" - }, - { - "id": "NA11919" - }, - { - "id": "NA12045" - }, - { - "id": "NA12273" - }, - { - "id": "NA12342" - }, - { - "id": "NA12414" - }, - { - "id": "NA12546" - }, - { - "id": "NA12760" - }, - { - "id": "NA12878" - }, - { - "id": "NA18516" - }, - { - "id": "NA18525" - }, - { - "id": "NA18534" - }, - { - "id": "NA18541" - }, - { - "id": "NA18557" - }, - { - "id": "NA18565" - }, - { - "id": "NA18616" - }, - { - "id": "NA18619" - }, - { - "id": "NA18623" - }, - { - "id": "NA18630" - }, - { - "id": "NA18631" - }, - { - "id": "NA18740" - }, - { - "id": "NA18853" - }, - { - "id": "NA18865" - }, - { - "id": "NA18873" - }, - { - "id": "NA18874" - }, - { - "id": "NA18916" - }, - { - "id": "NA18960" - }, - { - "id": "NA18966" - }, - { - "id": "NA18975" - }, - { - "id": "NA18976" - }, - { - "id": "NA18978" - }, - { - "id": "NA18990" - }, - { - "id": "NA19060" - }, - { - "id": "NA19063" - }, - { - "id": "NA19076" - }, - { - "id": "NA19086" - }, - { - "id": "NA19087" - }, - { - "id": "NA19096" - }, - { - "id": "NA19113" - }, - { - "id": "NA19118" - }, - { - "id": "NA19185" - }, - { - "id": "NA19209" - }, - { - "id": "NA19311" - }, - { - "id": "NA19314" - }, - { - "id": "NA19317" - }, - { - "id": "NA19321" - }, - { - "id": "NA19379" - }, - { - "id": "NA19384" - }, - { - "id": "NA19390" - }, - { - "id": "NA19397" - }, - { - "id": "NA19399" - }, - { - "id": "NA19404" - }, - { - "id": "NA19446" - }, - { - "id": "NA19448" - }, - { - "id": "NA19455" - }, - { - "id": "NA19456" - }, - { - "id": "NA19466" - }, - { - "id": "NA19655" - }, - { - "id": "NA19657" - }, - { - "id": "NA19670" - }, - { - "id": "NA19678" - }, - { - "id": "NA19679" - }, - { - "id": "NA19701" - }, - { - "id": "NA19720" - }, - { - "id": "NA19756" - }, - { - "id": "NA19761" - }, - { - "id": "NA19764" - }, - { - "id": "NA19786" - }, - { - "id": "NA20318" - }, - { - "id": "NA20351" - }, - { - "id": "NA20517" - }, - { - "id": "NA20518" - }, - { - "id": "NA20529" - }, - { - "id": "NA20587" - }, - { - "id": "NA20757" - }, - { - "id": "NA20798" - }, - { - "id": "NA20799" - }, - { - "id": "NA20800" - }, - { - "id": "NA20810" - }, - { - "id": "NA20826" - }, - { - "id": "NA20858" - }, - { - "id": "NA20864" - }, - { - "id": "NA20869" - }, - { - "id": "NA20877" - }, - { - "id": "NA20888" - }, - { - "id": "NA20910" - }, - { - "id": "NA21101" - }, - { - "id": "NA21113" - }, - { - "id": "NA21114" - }, - { - "id": "NA21116" - }, - { - "id": "NA21118" - }, - { - "id": "NA21133" - }, - { - "id": "NA21143" - } - ], - "contigs": [ - { - "id": "1", - "length": 249250621 - }, - { - "id": "2", - "length": 243199373 - }, - { - "id": "3", - "length": 198022430 - }, - { - "id": "4", - "length": 191154276 - }, - { - "id": "5", - "length": 180915260 - }, - { - "id": "6", - "length": 171115067 - }, - { - "id": "7", - "length": 159138663 - }, - { - "id": "8", - "length": 146364022 - }, - { - "id": "9", - "length": 141213431 - }, - { - "id": "10", - "length": 135534747 - }, - { - "id": "11", - "length": 135006516 - }, - { - "id": "12", - "length": 133851895 - }, - { - "id": "13", - "length": 115169878 - }, - { - "id": "14", - "length": 107349540 - }, - { - "id": "15", - "length": 102531392 - }, - { - "id": "16", - "length": 90354753 - }, - { - "id": "17", - "length": 81195210 - }, - { - "id": "18", - "length": 78077248 - }, - { - "id": "19", - "length": 59128983 - }, - { - "id": "20", - "length": 63025520 - }, - { - "id": "21", - "length": 48129895 - }, - { - "id": "22", - "length": 51304566 - }, - { - "id": "X", - "length": 155270560 - }, - { - "id": "Y", - "length": 59373566 - }, - { - "id": "MT", - "length": 16569 - }, - { - "id": "GL000207.1", - "length": 4262 - }, - { - "id": "GL000226.1", - "length": 15008 - }, - { - "id": "GL000229.1", - "length": 19913 - }, - { - "id": "GL000231.1", - "length": 27386 - }, - { - "id": "GL000210.1", - "length": 27682 - }, - { - "id": "GL000239.1", - "length": 33824 - }, - { - "id": "GL000235.1", - "length": 34474 - }, - { - "id": "GL000201.1", - "length": 36148 - }, - { - "id": "GL000247.1", - "length": 36422 - }, - { - "id": "GL000245.1", - "length": 36651 - }, - { - "id": "GL000197.1", - "length": 37175 - }, - { - "id": "GL000203.1", - "length": 37498 - }, - { - "id": "GL000246.1", - "length": 38154 - }, - { - "id": "GL000249.1", - "length": 38502 - }, - { - "id": "GL000196.1", - "length": 38914 - }, - { - "id": "GL000248.1", - "length": 39786 - }, - { - "id": "GL000244.1", - "length": 39929 - }, - { - "id": "GL000238.1", - "length": 39939 - }, - { - "id": "GL000202.1", - "length": 40103 - }, - { - "id": "GL000234.1", - "length": 40531 - }, - { - "id": "GL000232.1", - "length": 40652 - }, - { - "id": "GL000206.1", - "length": 41001 - }, - { - "id": "GL000240.1", - "length": 41933 - }, - { - "id": "GL000236.1", - "length": 41934 - }, - { - "id": "GL000241.1", - "length": 42152 - }, - { - "id": "GL000243.1", - "length": 43341 - }, - { - "id": "GL000242.1", - "length": 43523 - }, - { - "id": "GL000230.1", - "length": 43691 - }, - { - "id": "GL000237.1", - "length": 45867 - }, - { - "id": "GL000233.1", - "length": 45941 - }, - { - "id": "GL000204.1", - "length": 81310 - }, - { - "id": "GL000198.1", - "length": 90085 - }, - { - "id": "GL000208.1", - "length": 92689 - }, - { - "id": "GL000191.1", - "length": 106433 - }, - { - "id": "GL000227.1", - "length": 128374 - }, - { - "id": "GL000228.1", - "length": 129120 - }, - { - "id": "GL000214.1", - "length": 137718 - }, - { - "id": "GL000221.1", - "length": 155397 - }, - { - "id": "GL000209.1", - "length": 159169 - }, - { - "id": "GL000218.1", - "length": 161147 - }, - { - "id": "GL000220.1", - "length": 161802 - }, - { - "id": "GL000213.1", - "length": 164239 - }, - { - "id": "GL000211.1", - "length": 166566 - }, - { - "id": "GL000199.1", - "length": 169874 - }, - { - "id": "GL000217.1", - "length": 172149 - }, - { - "id": "GL000216.1", - "length": 172294 - }, - { - "id": "GL000215.1", - "length": 172545 - }, - { - "id": "GL000205.1", - "length": 174588 - }, - { - "id": "GL000219.1", - "length": 179198 - }, - { - "id": "GL000224.1", - "length": 179693 - }, - { - "id": "GL000223.1", - "length": 180455 - }, - { - "id": "GL000195.1", - "length": 182896 - }, - { - "id": "GL000212.1", - "length": 186858 - }, - { - "id": "GL000222.1", - "length": 186861 - }, - { - "id": "GL000200.1", - "length": 187035 - }, - { - "id": "GL000193.1", - "length": 189789 - }, - { - "id": "GL000194.1", - "length": 191469 - }, - { - "id": "GL000225.1", - "length": 211173 - }, - { - "id": "GL000192.1", - "length": 547496 + "format_version": "0.6", + "dimensions": { + "variants": { + "size": 10879, + "chunk_size": 1000 + }, + "samples": { + "size": 284, + "chunk_size": 284 + }, + "alleles": { + "size": 2, + "chunk_size": 2 + }, + "alt_alleles": { + "size": 1, + "chunk_size": 1 + }, + "filters": { + "size": 1, + "chunk_size": 1 + }, + "ploidy": { + "size": 2, + "chunk_size": 2 + }, + "genotypes": { + "size": 3, + "chunk_size": 3 } - ], - "filters": [ - { - "id": "PASS", - "description": "All filters passed" - } - ], + }, "fields": [ { "name": "variant_contig", "dtype": "i1", - "shape": [ - 10879 - ], - "chunks": [ - 10000 - ], "dimensions": [ "variants" ], "description": "An identifier from the reference genome or an angle-bracketed ID string pointing to a contig in the assembly file", - "vcf_field": null, - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 7, - "shuffle": 0, - "blocksize": 0 - }, - "filters": [] + "compressor": null, + "filters": null, + "source": null }, { "name": "variant_filter", "dtype": "bool", - "shape": [ - 10879, - 1 - ], - "chunks": [ - 10000, - 1 - ], "dimensions": [ "variants", "filters" ], "description": "Filter status of the variant", - "vcf_field": null, "compressor": { "id": "blosc", "cname": "zstd", @@ -1248,71 +57,50 @@ "shuffle": 2, "blocksize": 0 }, - "filters": [] + "filters": null, + "source": null }, { "name": "variant_allele", "dtype": "O", - "shape": [ - 10879, - 2 - ], - "chunks": [ - 10000, - 2 - ], "dimensions": [ "variants", "alleles" ], "description": "List of the reference and alternate alleles", - "vcf_field": null, - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 7, - "shuffle": 0, - "blocksize": 0 - }, - "filters": [] + "compressor": null, + "filters": null, + "source": null + }, + { + "name": "variant_length", + "dtype": "i1", + "dimensions": [ + "variants" + ], + "description": "The length of the variant measured in bases", + "compressor": null, + "filters": null, + "source": null }, { "name": "variant_id", "dtype": "O", - "shape": [ - 10879 - ], - "chunks": [ - 10000 - ], "dimensions": [ "variants" ], "description": "List of unique identifiers where applicable", - "vcf_field": null, - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 7, - "shuffle": 0, - "blocksize": 0 - }, - "filters": [] + "compressor": null, + "filters": null, + "source": null }, { "name": "variant_id_mask", "dtype": "bool", - "shape": [ - 10879 - ], - "chunks": [ - 10000 - ], "dimensions": [ "variants" ], "description": "", - "vcf_field": null, "compressor": { "id": "blosc", "cname": "zstd", @@ -1320,175 +108,274 @@ "shuffle": 2, "blocksize": 0 }, - "filters": [] + "filters": null, + "source": null }, { "name": "variant_quality", "dtype": "f4", - "shape": [ - 10879 - ], - "chunks": [ - 10000 - ], "dimensions": [ "variants" ], "description": "Phred-scaled quality score", - "vcf_field": "QUAL", - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 7, - "shuffle": 0, - "blocksize": 0 - }, - "filters": [] + "compressor": null, + "filters": null, + "source": "QUAL" }, { "name": "variant_position", "dtype": "i4", - "shape": [ - 10879 + "dimensions": [ + "variants" ], - "chunks": [ - 10000 + "description": "The reference position", + "compressor": null, + "filters": null, + "source": "POS" + }, + { + "name": "variant_AC", + "dtype": "i2", + "dimensions": [ + "variants" ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/AC" + }, + { + "name": "variant_AF", + "dtype": "f4", "dimensions": [ "variants" ], - "description": "The reference position", - "vcf_field": "POS", - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 7, - "shuffle": 0, - "blocksize": 0 - }, - "filters": [] + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/AF" }, { - "name": "variant_length", - "dtype": "i1", - "shape": [ - 10879 + "name": "variant_AN", + "dtype": "i2", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/AN" + }, + { + "name": "variant_BaseQRankSum", + "dtype": "f4", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/BaseQRankSum" + }, + { + "name": "variant_ClippingRankSum", + "dtype": "f4", + "dimensions": [ + "variants" ], - "chunks": [ - 10000 + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/ClippingRankSum" + }, + { + "name": "variant_DP", + "dtype": "i4", + "dimensions": [ + "variants" ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/DP" + }, + { + "name": "variant_DS", + "dtype": "bool", "dimensions": [ "variants" ], - "description": "The length of the variant measured in bases", - "vcf_field": "rlen", - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 7, - "shuffle": 0, - "blocksize": 0 - }, - "filters": [] + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/DS" }, { - "name": "call_AD", + "name": "variant_FS", + "dtype": "f4", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/FS" + }, + { + "name": "variant_HaplotypeScore", + "dtype": "f4", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/HaplotypeScore" + }, + { + "name": "variant_InbreedingCoeff", + "dtype": "f4", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/InbreedingCoeff" + }, + { + "name": "variant_MLEAC", + "dtype": "i2", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/MLEAC" + }, + { + "name": "variant_MLEAF", + "dtype": "f4", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/MLEAF" + }, + { + "name": "variant_MQ", + "dtype": "f4", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/MQ" + }, + { + "name": "variant_MQ0", "dtype": "i1", - "shape": [ - 10879, - 284, - 2 + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/MQ0" + }, + { + "name": "variant_MQRankSum", + "dtype": "f4", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/MQRankSum" + }, + { + "name": "variant_QD", + "dtype": "f4", + "dimensions": [ + "variants" + ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/QD" + }, + { + "name": "variant_ReadPosRankSum", + "dtype": "f4", + "dimensions": [ + "variants" ], - "chunks": [ - 10000, - 1000, - 2 + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/ReadPosRankSum" + }, + { + "name": "variant_set", + "dtype": "O", + "dimensions": [ + "variants" ], + "description": "", + "compressor": null, + "filters": null, + "source": "INFO/set" + }, + { + "name": "call_AD", + "dtype": "i1", "dimensions": [ "variants", "samples", "alleles" ], "description": "", - "vcf_field": "FORMAT/AD", - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 7, - "shuffle": 0, - "blocksize": 0 - }, - "filters": [] + "compressor": null, + "filters": null, + "source": "FORMAT/AD" }, { "name": "call_DP", "dtype": "i1", - "shape": [ - 10879, - 284 - ], - "chunks": [ - 10000, - 1000 - ], "dimensions": [ "variants", "samples" ], "description": "", - "vcf_field": "FORMAT/DP", - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 7, - "shuffle": 0, - "blocksize": 0 - }, - "filters": [] + "compressor": null, + "filters": null, + "source": "FORMAT/DP" }, { "name": "call_GQ", "dtype": "i1", - "shape": [ - 10879, - 284 - ], - "chunks": [ - 10000, - 1000 - ], "dimensions": [ "variants", "samples" ], "description": "", - "vcf_field": "FORMAT/GQ", - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 7, - "shuffle": 0, - "blocksize": 0 - }, - "filters": [] + "compressor": null, + "filters": null, + "source": "FORMAT/GQ" }, { "name": "call_genotype_phased", "dtype": "bool", - "shape": [ - 10879, - 284 - ], - "chunks": [ - 10000, - 1000 - ], "dimensions": [ "variants", "samples" ], "description": "", - "vcf_field": null, "compressor": { "id": "blosc", "cname": "zstd", @@ -1496,28 +383,18 @@ "shuffle": 2, "blocksize": 0 }, - "filters": [] + "filters": null, + "source": null }, { "name": "call_genotype", "dtype": "i1", - "shape": [ - 10879, - 284, - 2 - ], - "chunks": [ - 10000, - 1000, - 2 - ], "dimensions": [ "variants", "samples", "ploidy" ], "description": "", - "vcf_field": null, "compressor": { "id": "blosc", "cname": "zstd", @@ -1525,28 +402,18 @@ "shuffle": 2, "blocksize": 0 }, - "filters": [] + "filters": null, + "source": null }, { "name": "call_genotype_mask", "dtype": "bool", - "shape": [ - 10879, - 284, - 2 - ], - "chunks": [ - 10000, - 1000, - 2 - ], "dimensions": [ "variants", "samples", "ploidy" ], "description": "", - "vcf_field": null, "compressor": { "id": "blosc", "cname": "zstd", @@ -1554,7 +421,18 @@ "shuffle": 2, "blocksize": 0 }, - "filters": [] + "filters": null, + "source": null } - ] + ], + "defaults": { + "compressor": { + "id": "blosc", + "cname": "zstd", + "clevel": 7, + "shuffle": 1, + "blocksize": 0 + }, + "filters": [] + } } \ No newline at end of file diff --git a/docs/examples/gwas_tutorial.ipynb b/docs/examples/gwas_tutorial.ipynb index b0d45f25f..05fabc551 100644 --- a/docs/examples/gwas_tutorial.ipynb +++ b/docs/examples/gwas_tutorial.ipynb @@ -20039,7 +20039,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "venv", "language": "python", "name": "python3" }, @@ -20053,7 +20053,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.20" } }, "nbformat": 4, diff --git a/requirements-dev.txt b/requirements-dev.txt index e338c94be..8e856521a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -13,7 +13,7 @@ partd bed-reader rechunker cbgen > 1.0.5 -bio2zarr; platform_system != "Windows" +bio2zarr[vcf]; platform_system != "Windows" yarl matplotlib asv diff --git a/requirements-numpy1-dev.txt b/requirements-numpy1-dev.txt index d1df99192..ed0c5b629 100644 --- a/requirements-numpy1-dev.txt +++ b/requirements-numpy1-dev.txt @@ -13,7 +13,7 @@ partd bed-reader rechunker cbgen < 1.0.5 -bio2zarr; platform_system != "Windows" +bio2zarr[vcf]; platform_system != "Windows" yarl matplotlib asv diff --git a/sgkit/tests/io/test_vcf2zarr_compat.py b/sgkit/tests/io/test_vcf2zarr_compat.py index 696180fed..7f185763e 100644 --- a/sgkit/tests/io/test_vcf2zarr_compat.py +++ b/sgkit/tests/io/test_vcf2zarr_compat.py @@ -2,7 +2,7 @@ import pytest pytest.importorskip("bio2zarr") -from bio2zarr import vcf2zarr +from bio2zarr import vcf from bio2zarr.constants import ( FLOAT32_FILL, FLOAT32_MISSING, @@ -23,7 +23,7 @@ def test_vcf2zarr_compat(shared_datadir, tmp_path): vcf_path = shared_datadir / "sample.vcf.gz" vcz_path = tmp_path.joinpath("sample.vcz").as_posix() - vcf2zarr.convert( + vcf.convert( [vcf_path], vcz_path, variants_chunk_size=5,