Skip to content

Commit b10479e

Browse files
Chunk aligned slices
Encode progress seems to be broken
1 parent 01559ce commit b10479e

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

bio2zarr/core.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,10 @@ def progress_thread_worker(config):
146146
inc = current - pbar.n
147147
pbar.update(inc)
148148
time.sleep(config.poll_interval)
149+
# inc = config.total - pbar.n
150+
# pbar.update(inc)
149151
pbar.close()
152+
# print("EXITING PROGRESS THREAD")
150153

151154

152155
class ParallelWorkManager(contextlib.AbstractContextManager):

bio2zarr/vcf.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,7 @@ def write_chunk(self):
712712
# Update the summary
713713
self.vcf_field.summary.num_chunks += 1
714714
self.vcf_field.summary.compressed_size += len(compressed)
715-
self.vcf_field.summary.uncompressed_size += len(pkl)
715+
self.vcf_field.summary.uncompressed_size += self.buffered_bytes
716716
logger.debug(f"Finish write: {path}")
717717

718718
def flush(self):
@@ -1269,12 +1269,13 @@ def encode_column_slice(self, pcvcf, column, start, stop):
12691269
sanitiser = source_col.sanitiser_factory(ba.buff.shape)
12701270

12711271
for value in source_col.iter_values(start, stop):
1272+
core.update_progress(sys.getsizeof(value))
12721273
# We write directly into the buffer in the sanitiser function
12731274
# to make it easier to reason about dimension padding
12741275
j = ba.next_buffer_row()
12751276
sanitiser(ba.buff, j, value)
1276-
core.update_progress(sys.getsizeof(value))
12771277
ba.flush()
1278+
logger.debug(f"{column.name} slice {start}:{stop} done")
12781279

12791280
def encode_genotypes_slice(self, pcvcf, start, stop):
12801281
source_col = pcvcf.columns["FORMAT/GT"]
@@ -1283,6 +1284,7 @@ def encode_genotypes_slice(self, pcvcf, start, stop):
12831284
gt_phased = core.BufferedArray(self.root["call_genotype_phased"], start)
12841285

12851286
for value in source_col.iter_values(start, stop):
1287+
core.update_progress(sys.getsizeof(value))
12861288
j = gt.next_buffer_row()
12871289
sanitise_value_int_2d(gt.buff, j, value[:, :-1])
12881290
j = gt_phased.next_buffer_row()
@@ -1291,10 +1293,10 @@ def encode_genotypes_slice(self, pcvcf, start, stop):
12911293
# with mixed ploidies?
12921294
j = gt_mask.next_buffer_row()
12931295
gt_mask.buff[j] = gt.buff[j] < 0
1294-
core.update_progress(sys.getsizeof(value))
12951296
gt.flush()
12961297
gt_phased.flush()
12971298
gt_mask.flush()
1299+
logger.debug(f"GT slice {start}:{stop} done")
12981300

12991301
def encode_alleles(self, pcvcf):
13001302
ref_col = pcvcf.columns["REF"]

0 commit comments

Comments
 (0)