Skip to content

Commit 850a405

Browse files
Merge pull request #66 from jeromekelleher/some-polishing
Some polishing
2 parents bae5b96 + 2c31ba2 commit 850a405

File tree

6 files changed

+214
-142
lines changed

6 files changed

+214
-142
lines changed

bio2zarr/cli.py

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,21 @@
1515
)
1616

1717
# TODO help text
18-
chunk_length = click.option("-l", "--chunk-length", type=int, default=None)
18+
chunk_length = click.option(
19+
"-l",
20+
"--chunk-length",
21+
type=int,
22+
default=None,
23+
help="Chunk size in the variants dimension",
24+
)
1925

20-
chunk_width = click.option("-w", "--chunk-width", type=int, default=None)
26+
chunk_width = click.option(
27+
"-w",
28+
"--chunk-width",
29+
type=int,
30+
default=None,
31+
help="Chunk size in the samples dimension",
32+
)
2133

2234
version = click.version_option(version=provenance.__version__)
2335

@@ -83,13 +95,30 @@ def mkschema(if_path):
8395
@click.argument("if_path", type=click.Path())
8496
@click.argument("zarr_path", type=click.Path())
8597
@verbose
86-
@click.option("-s", "--schema", default=None)
87-
# TODO: these are mutually exclusive with schema, tell click this
98+
@click.option("-s", "--schema", default=None, type=click.Path(exists=True))
8899
@chunk_length
89100
@chunk_width
101+
@click.option(
102+
"-V",
103+
"--max-variant-chunks",
104+
type=int,
105+
default=None,
106+
help=(
107+
"Truncate the output in the variants dimension to have "
108+
"this number of chunks. Mainly intended to help with "
109+
"schema tuning."
110+
),
111+
)
90112
@worker_processes
91113
def encode(
92-
if_path, zarr_path, verbose, schema, chunk_length, chunk_width, worker_processes
114+
if_path,
115+
zarr_path,
116+
verbose,
117+
schema,
118+
chunk_length,
119+
chunk_width,
120+
max_variant_chunks,
121+
worker_processes,
93122
):
94123
"""
95124
Encode intermediate format (see explode) to vcfzarr
@@ -101,6 +130,7 @@ def encode(
101130
schema,
102131
chunk_length=chunk_length,
103132
chunk_width=chunk_width,
133+
max_v_chunks=max_variant_chunks,
104134
worker_processes=worker_processes,
105135
show_progress=True,
106136
)
@@ -132,6 +162,9 @@ def convert_vcf(vcfs, out_path, chunk_length, chunk_width, verbose, worker_proce
132162
@click.argument("vcfs", nargs=-1, required=True)
133163
@click.argument("out_path", type=click.Path())
134164
def validate(vcfs, out_path):
165+
"""
166+
Development only, do not use. Will be removed before release.
167+
"""
135168
# FIXME! Will silently not look at remaining VCFs
136169
vcf.validate(vcfs[0], out_path, show_progress=True)
137170

@@ -158,7 +191,9 @@ def vcf2zarr():
158191
@verbose
159192
@chunk_length
160193
@chunk_width
161-
def convert_plink(in_path, out_path, verbose, worker_processes, chunk_length, chunk_width):
194+
def convert_plink(
195+
in_path, out_path, verbose, worker_processes, chunk_length, chunk_width
196+
):
162197
"""
163198
In development; DO NOT USE!
164199
"""

bio2zarr/core.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323
)
2424

2525

26-
def chunk_aligned_slices(z, n):
26+
def chunk_aligned_slices(z, n, max_chunks=None):
2727
"""
2828
Returns at n slices in the specified zarr array, aligned
2929
with its chunks
3030
"""
3131
chunk_size = z.chunks[0]
3232
num_chunks = int(np.ceil(z.shape[0] / chunk_size))
33+
if max_chunks is not None:
34+
num_chunks = min(num_chunks, max_chunks)
3335
slices = []
3436
splits = np.array_split(np.arange(num_chunks), min(n, num_chunks))
3537
for split in splits:
@@ -132,7 +134,7 @@ class ProgressConfig:
132134
units: str = ""
133135
title: str = ""
134136
show: bool = False
135-
poll_interval: float = 0.001
137+
poll_interval: float = 0.01
136138

137139

138140
# NOTE: this approach means that we cannot have more than one
@@ -175,7 +177,7 @@ def __init__(self, worker_processes=1, progress_config=None):
175177
self.progress_config = progress_config
176178
self.progress_bar = tqdm.tqdm(
177179
total=progress_config.total,
178-
desc=f"{progress_config.title:>7}",
180+
desc=f"{progress_config.title:>9}",
179181
unit_scale=True,
180182
unit=progress_config.units,
181183
smoothing=0.1,
@@ -186,6 +188,7 @@ def __init__(self, worker_processes=1, progress_config=None):
186188
self.progress_thread = threading.Thread(
187189
target=self._update_progress_worker,
188190
name="progress-update",
191+
daemon=True, # Avoids deadlock on exit in awkward error conditions
189192
)
190193
self.progress_thread.start()
191194

0 commit comments

Comments
 (0)