Skip to content

Commit 9f817a2

Browse files
Logging improvements
1 parent 524fcb2 commit 9f817a2

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

bio2zarr/core.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,12 @@ def flush(self):
179179
f"{self.array_offset}:{self.array_offset + self.buffer_row}"
180180
f"{self.buff.nbytes / 2**20: .2f}Mb"
181181
)
182-
self.max_buff_size = max(self.max_buff_size, sys.getsizeof(self.buff))
182+
# Note this is inaccurate for string data as we're just reporting the
183+
# size of the container. When we switch the numpy 2 StringDtype this
184+
# should improve and we can get more visibility on how memory
185+
# is being used.
186+
# https://github.com/sgkit-dev/bio2zarr/issues/30
187+
self.max_buff_size = max(self.max_buff_size, self.buff.nbytes)
183188
self.array_offset += self.variants_chunk_size
184189
self.buffer_row = 0
185190

bio2zarr/vcf2zarr/vcz.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -862,13 +862,9 @@ def init_partition_array(self, partition_index, name):
862862

863863
def finalise_partition_array(self, partition_index, buffered_array):
864864
buffered_array.flush()
865-
# field_map = self.schema.field_map()
866-
# array_spec = field_map[buffered_array.name]
867-
# ba = buffered_array
868-
# print(array_spec.name, "ba.max_buff_size", ba.max_buff_size,
869-
# array_spec.variant_chunk_nbytes)
870865
logger.info(
871-
f"Completed partition {partition_index} array {buffered_array.name}"
866+
f"Completed partition {partition_index} array {buffered_array.name} "
867+
f"max_memory={core.display_size(buffered_array.max_buff_size)}"
872868
)
873869

874870
def encode_array_partition(self, array_spec, partition_index):

0 commit comments

Comments
 (0)