Skip to content

Commit 5cb84db

Browse files
Merge pull request #195 from jeromekelleher/refactor-package-structure
Refactor package structure
2 parents 7520af5 + 49e75c7 commit 5cb84db

File tree

11 files changed

+1646
-1651
lines changed

11 files changed

+1646
-1651
lines changed

bio2zarr/cli.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numcodecs
99
import tabulate
1010

11-
from . import plink, provenance, vcf, vcf_utils
11+
from . import icf, plink, provenance, vcf, vcf_utils
1212

1313
logger = logging.getLogger(__name__)
1414

@@ -167,7 +167,7 @@ def check_overwrite_dir(path, force):
167167
def get_compressor(cname):
168168
if cname is None:
169169
return None
170-
config = vcf.ICF_DEFAULT_COMPRESSOR.get_config()
170+
config = icf.ICF_DEFAULT_COMPRESSOR.get_config()
171171
config["cname"] = cname
172172
return numcodecs.get_codec(config)
173173

@@ -198,7 +198,7 @@ def explode(
198198
"""
199199
setup_logging(verbose)
200200
check_overwrite_dir(icf_path, force)
201-
vcf.explode(
201+
icf.explode(
202202
icf_path,
203203
vcfs,
204204
worker_processes=worker_processes,
@@ -235,7 +235,7 @@ def dexplode_init(
235235
"""
236236
setup_logging(verbose)
237237
check_overwrite_dir(icf_path, force)
238-
work_summary = vcf.explode_init(
238+
work_summary = icf.explode_init(
239239
icf_path,
240240
vcfs,
241241
target_num_partitions=num_partitions,
@@ -263,7 +263,7 @@ def dexplode_partition(icf_path, partition, verbose, one_based):
263263
setup_logging(verbose)
264264
if one_based:
265265
partition -= 1
266-
vcf.explode_partition(icf_path, partition)
266+
icf.explode_partition(icf_path, partition)
267267

268268

269269
@click.command
@@ -274,7 +274,7 @@ def dexplode_finalise(icf_path, verbose):
274274
Final step for distributed conversion of VCF(s) to intermediate columnar format.
275275
"""
276276
setup_logging(verbose)
277-
vcf.explode_finalise(icf_path)
277+
icf.explode_finalise(icf_path)
278278

279279

280280
@click.command

bio2zarr/constants.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import numpy as np
2+
3+
INT_MISSING = -1
4+
INT_FILL = -2
5+
STR_MISSING = "."
6+
STR_FILL = ""
7+
8+
FLOAT32_MISSING, FLOAT32_FILL = np.array([0x7F800001, 0x7F800002], dtype=np.int32).view(
9+
np.float32
10+
)
11+
FLOAT32_MISSING_AS_INT32, FLOAT32_FILL_AS_INT32 = np.array(
12+
[0x7F800001, 0x7F800002], dtype=np.int32
13+
)
14+
15+
16+
MIN_INT_VALUE = np.iinfo(np.int32).min + 2
17+
VCF_INT_MISSING = np.iinfo(np.int32).min
18+
VCF_INT_FILL = np.iinfo(np.int32).min + 1

bio2zarr/core.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import concurrent.futures as cf
22
import contextlib
33
import dataclasses
4+
import json
45
import logging
6+
import math
57
import multiprocessing
68
import os
79
import os.path
810
import threading
911
import time
1012

13+
import humanfriendly
1114
import numcodecs
1215
import numpy as np
1316
import tqdm
@@ -18,6 +21,17 @@
1821
numcodecs.blosc.use_threads = False
1922

2023

24+
def display_number(x):
25+
ret = "n/a"
26+
if math.isfinite(x):
27+
ret = f"{x: 0.2g}"
28+
return ret
29+
30+
31+
def display_size(n):
32+
return humanfriendly.format_size(n, binary=True)
33+
34+
2135
def min_int_dtype(min_value, max_value):
2236
if min_value > max_value:
2337
raise ValueError("min_value must be <= max_value")
@@ -277,3 +291,11 @@ def __exit__(self, exc_type, exc_val, exc_tb):
277291
self._update_progress()
278292
self.progress_bar.close()
279293
return False
294+
295+
296+
class JsonDataclass:
297+
def asdict(self):
298+
return dataclasses.asdict(self)
299+
300+
def asjson(self):
301+
return json.dumps(self.asdict(), indent=4)

0 commit comments

Comments
 (0)