@@ -712,7 +712,7 @@ def write_chunk(self):
712
712
# Update the summary
713
713
self .vcf_field .summary .num_chunks += 1
714
714
self .vcf_field .summary .compressed_size += len (compressed )
715
- self .vcf_field .summary .uncompressed_size += len ( pkl )
715
+ self .vcf_field .summary .uncompressed_size += self . buffered_bytes
716
716
logger .debug (f"Finish write: { path } " )
717
717
718
718
def flush (self ):
@@ -1269,12 +1269,13 @@ def encode_column_slice(self, pcvcf, column, start, stop):
1269
1269
sanitiser = source_col .sanitiser_factory (ba .buff .shape )
1270
1270
1271
1271
for value in source_col .iter_values (start , stop ):
1272
+ core .update_progress (sys .getsizeof (value ))
1272
1273
# We write directly into the buffer in the sanitiser function
1273
1274
# to make it easier to reason about dimension padding
1274
1275
j = ba .next_buffer_row ()
1275
1276
sanitiser (ba .buff , j , value )
1276
- core .update_progress (sys .getsizeof (value ))
1277
1277
ba .flush ()
1278
+ logger .debug (f"{ column .name } slice { start } :{ stop } done" )
1278
1279
1279
1280
def encode_genotypes_slice (self , pcvcf , start , stop ):
1280
1281
source_col = pcvcf .columns ["FORMAT/GT" ]
@@ -1283,6 +1284,7 @@ def encode_genotypes_slice(self, pcvcf, start, stop):
1283
1284
gt_phased = core .BufferedArray (self .root ["call_genotype_phased" ], start )
1284
1285
1285
1286
for value in source_col .iter_values (start , stop ):
1287
+ core .update_progress (sys .getsizeof (value ))
1286
1288
j = gt .next_buffer_row ()
1287
1289
sanitise_value_int_2d (gt .buff , j , value [:, :- 1 ])
1288
1290
j = gt_phased .next_buffer_row ()
@@ -1291,10 +1293,10 @@ def encode_genotypes_slice(self, pcvcf, start, stop):
1291
1293
# with mixed ploidies?
1292
1294
j = gt_mask .next_buffer_row ()
1293
1295
gt_mask .buff [j ] = gt .buff [j ] < 0
1294
- core .update_progress (sys .getsizeof (value ))
1295
1296
gt .flush ()
1296
1297
gt_phased .flush ()
1297
1298
gt_mask .flush ()
1299
+ logger .debug (f"GT slice { start } :{ stop } done" )
1298
1300
1299
1301
def encode_alleles (self , pcvcf ):
1300
1302
ref_col = pcvcf .columns ["REF" ]
0 commit comments