4
4
import numpy as np
5
5
import zarr
6
6
import bed_reader
7
+ import numcodecs
7
8
8
9
from . import core
9
10
@@ -82,11 +83,16 @@ def convert(
82
83
chunks = [variants_chunk_size , samples_chunk_size ]
83
84
dimensions = ["variants" , "samples" ]
84
85
86
+ # TODO we should be reusing some logic from vcfzarr here on laying
87
+ # out the basic dataset, and using the schema generator. Currently
88
+ # we're not using the best Blosc settings for genotypes here.
89
+ default_compressor = numcodecs .Blosc (cname = "zstd" , clevel = 7 )
90
+
85
91
a = root .array (
86
92
"sample_id" ,
87
93
bed .iid ,
88
94
dtype = "str" ,
89
- compressor = core . default_compressor ,
95
+ compressor = default_compressor ,
90
96
chunks = (samples_chunk_size ,),
91
97
)
92
98
a .attrs ["_ARRAY_DIMENSIONS" ] = ["samples" ]
@@ -98,7 +104,7 @@ def convert(
98
104
"variant_position" ,
99
105
bed .bp_position ,
100
106
dtype = np .int32 ,
101
- compressor = core . default_compressor ,
107
+ compressor = default_compressor ,
102
108
chunks = (variants_chunk_size ,),
103
109
)
104
110
a .attrs ["_ARRAY_DIMENSIONS" ] = ["variants" ]
@@ -109,7 +115,7 @@ def convert(
109
115
"variant_allele" ,
110
116
alleles ,
111
117
dtype = "str" ,
112
- compressor = core . default_compressor ,
118
+ compressor = default_compressor ,
113
119
chunks = (variants_chunk_size ,),
114
120
)
115
121
a .attrs ["_ARRAY_DIMENSIONS" ] = ["variants" , "alleles" ]
@@ -121,7 +127,7 @@ def convert(
121
127
dtype = "bool" ,
122
128
shape = list (shape ),
123
129
chunks = list (chunks ),
124
- compressor = core . default_compressor ,
130
+ compressor = default_compressor ,
125
131
)
126
132
a .attrs ["_ARRAY_DIMENSIONS" ] = list (dimensions )
127
133
@@ -132,7 +138,7 @@ def convert(
132
138
dtype = "i1" ,
133
139
shape = list (shape ),
134
140
chunks = list (chunks ),
135
- compressor = core . default_compressor ,
141
+ compressor = default_compressor ,
136
142
)
137
143
a .attrs ["_ARRAY_DIMENSIONS" ] = list (dimensions )
138
144
@@ -141,7 +147,7 @@ def convert(
141
147
dtype = "bool" ,
142
148
shape = list (shape ),
143
149
chunks = list (chunks ),
144
- compressor = core . default_compressor ,
150
+ compressor = default_compressor ,
145
151
)
146
152
a .attrs ["_ARRAY_DIMENSIONS" ] = list (dimensions )
147
153
0 commit comments