Skip to content

Commit d08b24d

Browse files
Move display functions into core
1 parent 0aef4e0 commit d08b24d

File tree

2 files changed

+29
-25
lines changed

2 files changed

+29
-25
lines changed

bio2zarr/core.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
import dataclasses
44
import json
55
import logging
6+
import math
67
import multiprocessing
78
import os
89
import os.path
910
import threading
1011
import time
1112

13+
import humanfriendly
1214
import numcodecs
1315
import numpy as np
1416
import tqdm
@@ -19,6 +21,17 @@
1921
numcodecs.blosc.use_threads = False
2022

2123

24+
def display_number(x):
25+
ret = "n/a"
26+
if math.isfinite(x):
27+
ret = f"{x: 0.2g}"
28+
return ret
29+
30+
31+
def display_size(n):
32+
return humanfriendly.format_size(n, binary=True)
33+
34+
2235
def min_int_dtype(min_value, max_value):
2336
if min_value > max_value:
2437
raise ValueError("min_value must be <= max_value")

bio2zarr/vcf.py

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,6 @@
2323
logger = logging.getLogger(__name__)
2424

2525

26-
def display_number(x):
27-
ret = "n/a"
28-
if math.isfinite(x):
29-
ret = f"{x: 0.2g}"
30-
return ret
31-
32-
33-
def display_size(n):
34-
return humanfriendly.format_size(n, binary=True)
35-
36-
3726
@dataclasses.dataclass
3827
class VcfFieldSummary(core.JsonDataclass):
3928
num_chunks: int = 0
@@ -874,11 +863,11 @@ def summary_table(self):
874863
"name": name,
875864
"type": col.vcf_field.vcf_type,
876865
"chunks": summary.num_chunks,
877-
"size": display_size(summary.uncompressed_size),
878-
"compressed": display_size(summary.compressed_size),
866+
"size": core.display_size(summary.uncompressed_size),
867+
"compressed": core.display_size(summary.compressed_size),
879868
"max_n": summary.max_number,
880-
"min_val": display_number(summary.min_value),
881-
"max_val": display_number(summary.max_value),
869+
"min_val": core.display_number(summary.min_value),
870+
"max_val": core.display_number(summary.max_value),
882871
}
883872

884873
data.append(d)
@@ -1546,12 +1535,12 @@ def summary_table(self):
15461535
d = {
15471536
"name": array.name,
15481537
"dtype": str(array.dtype),
1549-
"stored": display_size(stored),
1550-
"size": display_size(array.nbytes),
1551-
"ratio": display_number(array.nbytes / stored),
1538+
"stored": core.display_size(stored),
1539+
"size": core.display_size(array.nbytes),
1540+
"ratio": core.display_number(array.nbytes / stored),
15521541
"nchunks": str(array.nchunks),
1553-
"chunk_size": display_size(array.nbytes / array.nchunks),
1554-
"avg_chunk_stored": display_size(int(stored / array.nchunks)),
1542+
"chunk_size": core.display_size(array.nbytes / array.nchunks),
1543+
"avg_chunk_stored": core.display_size(int(stored / array.nchunks)),
15551544
"shape": str(array.shape),
15561545
"chunk_shape": str(array.chunks),
15571546
"compressor": str(array.compressor),
@@ -1567,7 +1556,7 @@ def parse_max_memory(max_memory):
15671556
return 2**63
15681557
if isinstance(max_memory, str):
15691558
max_memory = humanfriendly.parse_size(max_memory)
1570-
logger.info(f"Set memory budget to {display_size(max_memory)}")
1559+
logger.info(f"Set memory budget to {core.display_size(max_memory)}")
15711560
return max_memory
15721561

15731562

@@ -1721,7 +1710,7 @@ def init(
17211710
num_samples=self.icf.num_samples,
17221711
num_partitions=self.num_partitions,
17231712
num_chunks=total_chunks,
1724-
max_encoding_memory=display_size(self.get_max_encoding_memory()),
1713+
max_encoding_memory=core.display_size(self.get_max_encoding_memory()),
17251714
)
17261715

17271716
def encode_samples(self, root):
@@ -2082,7 +2071,7 @@ def encode_all_partitions(
20822071
per_worker_memory = self.get_max_encoding_memory()
20832072
logger.info(
20842073
f"Encoding Zarr over {num_partitions} partitions with "
2085-
f"{worker_processes} workers and {display_size(per_worker_memory)} "
2074+
f"{worker_processes} workers and {core.display_size(per_worker_memory)} "
20862075
"per worker"
20872076
)
20882077
# Each partition requires per_worker_memory bytes, so to prevent more that
@@ -2091,12 +2080,14 @@ def encode_all_partitions(
20912080
if max_num_workers < worker_processes:
20922081
logger.warning(
20932082
f"Limiting number of workers to {max_num_workers} to "
2094-
f"keep within specified memory budget of {display_size(max_memory)}"
2083+
"keep within specified memory budget of "
2084+
f"{core.display_size(max_memory)}"
20952085
)
20962086
if max_num_workers <= 0:
20972087
raise ValueError(
20982088
f"Insufficient memory to encode a partition:"
2099-
f"{display_size(per_worker_memory)} > {display_size(max_memory)}"
2089+
f"{core.display_size(per_worker_memory)} > "
2090+
f"{core.display_size(max_memory)}"
21002091
)
21012092
num_workers = min(max_num_workers, worker_processes)
21022093

0 commit comments

Comments
 (0)