File tree Expand file tree Collapse file tree 2 files changed +17
-0
lines changed Expand file tree Collapse file tree 2 files changed +17
-0
lines changed Original file line number Diff line number Diff line change @@ -1172,6 +1172,19 @@ def create_array(self, variable):
1172
1172
a .attrs ["_ARRAY_DIMENSIONS" ] = variable .dimensions
1173
1173
1174
1174
def encode_column (self , pcvcf , column , encoder_threads = 4 ):
1175
+ # TODO we're doing this the wrong way at the moment, overcomplicating
1176
+ # things by having the ThreadedZarrEncoder. It would be simpler if
1177
+ # we split the columns into vertical chunks, and just pushed a bunch
1178
+ # of futures for encoding start:end slices of each column. The
1179
+ # complicating factor here is that we need to get these slices
1180
+ # out of the pcvcf, which takes a little bit of doing (but fine,
1181
+ # because we know the number of records in each partition).
1182
+ # An annoying factor then is how to update the progess meter
1183
+ # because the "bytes read" approach becomes problematic
1184
+ # when we might access the same chunk several times.
1185
+ # Would perhaps be better to call sys.getsizeof() on the stored
1186
+ # value each time.
1187
+
1175
1188
source_col = pcvcf .columns [column .vcf_field ]
1176
1189
array = self .root [column .name ]
1177
1190
ba = core .BufferedArray (array )
Original file line number Diff line number Diff line change 7
7
8
8
from bio2zarr import vcf
9
9
10
+ # TODO add support here for split vcfs. Perhaps simplest to take a
11
+ # directory provided as input as indicating this, and then having
12
+ # the original unsplit vs split files in there following some
13
+ # naming conventions.
10
14
11
15
@click .command
12
16
@click .argument ("vcfs" , nargs = - 1 )
You can’t perform that action at this time.
0 commit comments