Skip to content

Commit e688698

Browse files
Will-Tylerjeromekelleher
authored andcommitted
Add local-alleles option to CLI
1 parent 2bdc35f commit e688698

File tree

4 files changed

+53
-1
lines changed

4 files changed

+53
-1
lines changed

bio2zarr/cli.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,14 @@ def list_commands(self, ctx):
149149
help="An approximate bound on overall memory usage (e.g. 10G),",
150150
)
151151

152+
local_alleles = click.option(
153+
"--local-alleles",
154+
type=bool,
155+
show_default=True,
156+
default=True,
157+
help="Use local allele fields to reduce the storage requirements of the output.",
158+
)
159+
152160

153161
def setup_logging(verbosity):
154162
level = "WARNING"
@@ -214,6 +222,7 @@ def show_work_summary(work_summary, json):
214222
@compressor
215223
@progress
216224
@worker_processes
225+
@local_alleles
217226
def explode(
218227
vcfs,
219228
icf_path,
@@ -223,6 +232,7 @@ def explode(
223232
compressor,
224233
progress,
225234
worker_processes,
235+
local_alleles,
226236
):
227237
"""
228238
Convert VCF(s) to intermediate columnar format
@@ -236,6 +246,7 @@ def explode(
236246
column_chunk_size=column_chunk_size,
237247
compressor=get_compressor(compressor),
238248
show_progress=progress,
249+
local_alleles=local_alleles,
239250
)
240251

241252

@@ -250,6 +261,7 @@ def explode(
250261
@verbose
251262
@progress
252263
@worker_processes
264+
@local_alleles
253265
def dexplode_init(
254266
vcfs,
255267
icf_path,
@@ -261,6 +273,7 @@ def dexplode_init(
261273
verbose,
262274
progress,
263275
worker_processes,
276+
local_alleles,
264277
):
265278
"""
266279
Initial step for distributed conversion of VCF(s) to intermediate columnar format
@@ -277,6 +290,7 @@ def dexplode_init(
277290
worker_processes=worker_processes,
278291
compressor=get_compressor(compressor),
279292
show_progress=progress,
293+
local_alleles=local_alleles,
280294
)
281295
show_work_summary(work_summary, json)
282296

bio2zarr/vcf2zarr/icf.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def make_field_def(name, vcf_type, vcf_number):
216216
return fields
217217

218218

219-
def scan_vcf(path, target_num_partitions):
219+
def scan_vcf(path, target_num_partitions, *, local_alleles):
220220
with vcf_utils.IndexedVcf(path) as indexed_vcf:
221221
vcf = indexed_vcf.vcf
222222
filters = []
@@ -1011,6 +1011,7 @@ def init(
10111011
target_num_partitions=None,
10121012
show_progress=False,
10131013
compressor=None,
1014+
local_alleles,
10141015
):
10151016
if self.path.exists():
10161017
raise ValueError(f"ICF path already exists: {self.path}")
@@ -1025,6 +1026,7 @@ def init(
10251026
worker_processes=worker_processes,
10261027
show_progress=show_progress,
10271028
target_num_partitions=target_num_partitions,
1029+
local_alleles=local_alleles,
10281030
)
10291031
check_field_clobbering(icf_metadata)
10301032
self.metadata = icf_metadata
@@ -1245,6 +1247,7 @@ def explode(
12451247
worker_processes=1,
12461248
show_progress=False,
12471249
compressor=None,
1250+
local_alleles=True,
12481251
):
12491252
writer = IntermediateColumnarFormatWriter(icf_path)
12501253
writer.init(
@@ -1255,6 +1258,7 @@ def explode(
12551258
show_progress=show_progress,
12561259
column_chunk_size=column_chunk_size,
12571260
compressor=compressor,
1261+
local_alleles=local_alleles,
12581262
)
12591263
writer.explode(worker_processes=worker_processes, show_progress=show_progress)
12601264
writer.finalise()
@@ -1270,6 +1274,7 @@ def explode_init(
12701274
worker_processes=1,
12711275
show_progress=False,
12721276
compressor=None,
1277+
local_alleles=True,
12731278
):
12741279
writer = IntermediateColumnarFormatWriter(icf_path)
12751280
return writer.init(
@@ -1279,6 +1284,7 @@ def explode_init(
12791284
show_progress=show_progress,
12801285
column_chunk_size=column_chunk_size,
12811286
compressor=compressor,
1287+
local_alleles=local_alleles,
12821288
)
12831289

12841290

tests/test_cli.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
compressor=None,
1515
worker_processes=1,
1616
show_progress=True,
17+
local_alleles=True,
1718
)
1819

1920
DEFAULT_DEXPLODE_PARTITION_ARGS = dict()
@@ -23,6 +24,7 @@
2324
column_chunk_size=64,
2425
compressor=None,
2526
show_progress=True,
27+
local_alleles=True,
2628
)
2729

2830
DEFAULT_ENCODE_ARGS = dict(
@@ -287,6 +289,24 @@ def test_vcf_explode_missing_and_existing_vcf(self, mocked, tmp_path):
287289
assert "'no_such_file' does not exist" in result.stderr
288290
mocked.assert_not_called()
289291

292+
@pytest.mark.parametrize("local_alleles", [False, True])
293+
@mock.patch("bio2zarr.vcf2zarr.explode")
294+
def test_vcf_explode_local_alleles(self, mocked, tmp_path, local_alleles):
295+
icf_path = tmp_path / "icf"
296+
runner = ct.CliRunner(mix_stderr=False)
297+
result = runner.invoke(
298+
cli.vcf2zarr_main,
299+
f"explode {self.vcf_path} {icf_path}"
300+
f" --local-alleles {str(local_alleles).lower()}",
301+
catch_exceptions=False,
302+
)
303+
assert result.exit_code == 0
304+
assert len(result.stdout) == 0
305+
assert len(result.stderr) == 0
306+
args = dict(DEFAULT_EXPLODE_ARGS)
307+
args["local_alleles"] = local_alleles
308+
mocked.assert_called_once_with(str(icf_path), (self.vcf_path,), **args)
309+
290310
@pytest.mark.parametrize(("progress", "flag"), [(True, "-P"), (False, "-Q")])
291311
@mock.patch("bio2zarr.vcf2zarr.explode_init", return_value=FakeWorkSummary(5))
292312
def test_vcf_dexplode_init(self, mocked, tmp_path, progress, flag):

tests/test_icf.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ class TestSmallExample:
2424

2525
@pytest.fixture(scope="class")
2626
def icf(self, tmp_path_factory):
27+
out = tmp_path_factory.mktemp("data") / "example.exploded"
28+
return vcf2zarr.explode(out, [self.data_path], local_alleles=False)
29+
30+
@pytest.fixture(scope="class")
31+
def icf_local_alleles(self, tmp_path_factory):
2732
out = tmp_path_factory.mktemp("data") / "example.exploded"
2833
return vcf2zarr.explode(out, [self.data_path])
2934

@@ -49,6 +54,13 @@ def test_summary_table(self, icf):
4954
fields = [d["name"] for d in data]
5055
assert tuple(sorted(fields)) == self.fields
5156

57+
def test_summary_table_local_allleles(self, icf_local_alleles):
58+
data = icf_local_alleles.summary_table()
59+
fields = [d["name"] for d in data]
60+
fields.sort()
61+
expected = tuple(sorted((*self.fields, "FORMAT/LAA")))
62+
assert tuple(fields) == expected
63+
5264
def test_inspect(self, icf):
5365
assert icf.summary_table() == vcf2zarr.inspect(icf.path)
5466

0 commit comments

Comments
 (0)