3
3
import io
4
4
import re
5
5
import sys
6
+ from collections import deque
6
7
from datetime import datetime
7
8
8
9
import numpy as np
@@ -189,9 +190,12 @@ def write_vcf(
189
190
190
191
if variant_regions is None and variant_targets is None :
191
192
# no regions or targets selected
193
+ executor = concurrent .futures .ThreadPoolExecutor ()
194
+ preceding_future = None
192
195
for v_chunk in range (pos .cdata_shape [0 ]):
193
196
v_mask_chunk = filter_evaluator (v_chunk ) if filter_evaluator else None
194
- c_chunk_to_vcf (
197
+ future = executor .submit (
198
+ c_chunk_to_vcf ,
195
199
root ,
196
200
v_chunk ,
197
201
v_mask_chunk ,
@@ -201,7 +205,16 @@ def write_vcf(
201
205
output ,
202
206
drop_genotypes = drop_genotypes ,
203
207
no_update = no_update ,
208
+ executor = executor ,
204
209
)
210
+ if preceding_future :
211
+ concurrent .futures .wait ((preceding_future ,))
212
+ preceding_future = future
213
+ if preceding_future :
214
+ concurrent .futures .wait ((preceding_future ,))
215
+ # We can't use a with-statement because the threads
216
+ # themselves submit tasks to the executor.
217
+ executor .shutdown ()
205
218
else :
206
219
contigs_u = root ["contig_id" ][:].astype ("U" ).tolist ()
207
220
regions = parse_regions (variant_regions , contigs_u )
@@ -246,7 +259,8 @@ def write_vcf(
246
259
# Use zarr arrays to get mask chunks aligned with the main data
247
260
# for convenience.
248
261
z_variant_mask = zarr .array (variant_mask , chunks = pos .chunks [0 ])
249
-
262
+ executor = concurrent .futures .ThreadPoolExecutor ()
263
+ preceding_future = None
250
264
for i , v_chunk in enumerate (chunk_indexes ):
251
265
v_mask_chunk = z_variant_mask .blocks [i ]
252
266
@@ -255,7 +269,8 @@ def write_vcf(
255
269
v_mask_chunk , filter_evaluator (v_chunk )
256
270
)
257
271
if np .any (v_mask_chunk ):
258
- c_chunk_to_vcf (
272
+ future = executor .submit (
273
+ c_chunk_to_vcf ,
259
274
root ,
260
275
v_chunk ,
261
276
v_mask_chunk ,
@@ -265,7 +280,16 @@ def write_vcf(
265
280
output ,
266
281
drop_genotypes = drop_genotypes ,
267
282
no_update = no_update ,
283
+ executor = executor ,
268
284
)
285
+ if preceding_future :
286
+ concurrent .futures .wait ((preceding_future ,))
287
+ preceding_future = future
288
+ if preceding_future :
289
+ concurrent .futures .wait ((preceding_future ,))
290
+ # We can't use a with-statement because the threads
291
+ # themselves submit tasks to the executor.
292
+ executor .shutdown ()
269
293
270
294
271
295
def get_vchunk_array (zarray , v_chunk , mask , samples_selection = None ):
@@ -292,6 +316,7 @@ def c_chunk_to_vcf(
292
316
* ,
293
317
drop_genotypes ,
294
318
no_update ,
319
+ executor : concurrent .futures .Executor ,
295
320
):
296
321
chrom = None
297
322
pos = None
@@ -384,28 +409,29 @@ def load_gt_phased():
384
409
nonlocal gt_phased
385
410
gt_phased = np .zeros_like (gt , dtype = bool )
386
411
387
- with concurrent .futures .ThreadPoolExecutor () as executor :
388
- executor .submit (load_chrom )
389
- executor .submit (load_pos )
390
- executor .submit (load_id )
391
- executor .submit (load_alleles )
392
- executor .submit (load_qual )
393
- executor .submit (load_filter )
394
-
395
- for name , zarray in root .items ():
396
- if (
397
- name .startswith ("call_" )
398
- and not name .startswith ("call_genotype" )
399
- and num_samples != 0
400
- ):
401
- executor .submit (load_format_field , name , zarray )
402
- if num_samples is None :
403
- num_samples = zarray .shape [1 ]
404
- elif name .startswith ("variant_" ) and name not in RESERVED_VARIABLE_NAMES :
405
- executor .submit (load_info_field , name , zarray )
406
-
407
- executor .submit (load_gt )
408
- executor .submit (load_gt_phased )
412
+ futures = deque ()
413
+ futures .append (executor .submit (load_chrom ))
414
+ futures .append (executor .submit (load_pos ))
415
+ futures .append (executor .submit (load_id ))
416
+ futures .append (executor .submit (load_alleles ))
417
+ futures .append (executor .submit (load_qual ))
418
+ futures .append (executor .submit (load_filter ))
419
+
420
+ for name , zarray in root .items ():
421
+ if (
422
+ name .startswith ("call_" )
423
+ and not name .startswith ("call_genotype" )
424
+ and num_samples != 0
425
+ ):
426
+ futures .append (executor .submit (load_format_field , name , zarray ))
427
+ if num_samples is None :
428
+ num_samples = zarray .shape [1 ]
429
+ elif name .startswith ("variant_" ) and name not in RESERVED_VARIABLE_NAMES :
430
+ futures .append (executor .submit (load_info_field , name , zarray ))
431
+
432
+ futures .append (executor .submit (load_gt ))
433
+ futures .append (executor .submit (load_gt_phased ))
434
+ concurrent .futures .wait (futures )
409
435
410
436
ref = alleles [:, 0 ].astype ("S" )
411
437
alt = alleles [:, 1 :].astype ("S" )
0 commit comments