Skip to content

Commit 0768962

Browse files
Fixup local alleles interface
- Add option to top-level convert function - Change so there is a single source of truth for the default
1 parent d1e3e09 commit 0768962

File tree

5 files changed

+33
-6
lines changed

5 files changed

+33
-6
lines changed

bio2zarr/cli.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ def dencode_finalise(zarr_path, verbose, progress):
482482
@verbose
483483
@progress
484484
@worker_processes
485+
@local_alleles
485486
def convert_vcf(
486487
vcfs,
487488
zarr_path,
@@ -491,6 +492,7 @@ def convert_vcf(
491492
verbose,
492493
progress,
493494
worker_processes,
495+
local_alleles,
494496
):
495497
"""
496498
Convert input VCF(s) directly to vcfzarr (not recommended for large files).
@@ -504,6 +506,7 @@ def convert_vcf(
504506
samples_chunk_size=samples_chunk_size,
505507
show_progress=progress,
506508
worker_processes=worker_processes,
509+
local_alleles=local_alleles,
507510
)
508511

509512

bio2zarr/vcf2zarr/icf.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,12 +1083,14 @@ def init(
10831083
target_num_partitions=None,
10841084
show_progress=False,
10851085
compressor=None,
1086-
local_alleles,
1086+
local_alleles=None,
10871087
):
10881088
if self.path.exists():
10891089
raise ValueError(f"ICF path already exists: {self.path}")
10901090
if compressor is None:
10911091
compressor = ICF_DEFAULT_COMPRESSOR
1092+
if local_alleles is None:
1093+
local_alleles = True
10921094
vcfs = [pathlib.Path(vcf) for vcf in vcfs]
10931095
target_num_partitions = max(target_num_partitions, len(vcfs))
10941096

@@ -1310,7 +1312,7 @@ def explode(
13101312
worker_processes=1,
13111313
show_progress=False,
13121314
compressor=None,
1313-
local_alleles=True,
1315+
local_alleles=None,
13141316
):
13151317
writer = IntermediateColumnarFormatWriter(icf_path)
13161318
writer.init(
@@ -1337,7 +1339,7 @@ def explode_init(
13371339
worker_processes=1,
13381340
show_progress=False,
13391341
compressor=None,
1340-
local_alleles=True,
1342+
local_alleles=None,
13411343
):
13421344
writer = IntermediateColumnarFormatWriter(icf_path)
13431345
return writer.init(

bio2zarr/vcf2zarr/vcz.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,7 @@ def convert(
10611061
variants_chunk_size=None,
10621062
samples_chunk_size=None,
10631063
worker_processes=1,
1064+
local_alleles=None,
10641065
show_progress=False,
10651066
icf_path=None,
10661067
):
@@ -1075,6 +1076,7 @@ def convert(
10751076
vcfs,
10761077
worker_processes=worker_processes,
10771078
show_progress=show_progress,
1079+
local_alleles=local_alleles,
10781080
)
10791081
encode(
10801082
icf_path,

tests/test_cli.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@
5454
samples_chunk_size=None,
5555
show_progress=True,
5656
worker_processes=1,
57+
local_alleles=True,
58+
)
59+
60+
DEFAULT_PLINK_CONVERT_ARGS = dict(
61+
variants_chunk_size=None,
62+
samples_chunk_size=None,
63+
show_progress=True,
64+
worker_processes=1,
5765
)
5866

5967

@@ -621,7 +629,7 @@ def test_convert_plink(self, mocked, progress, flag):
621629
assert result.exit_code == 0
622630
assert len(result.stdout) == 0
623631
assert len(result.stderr) == 0
624-
args = dict(DEFAULT_CONVERT_ARGS)
632+
args = dict(DEFAULT_PLINK_CONVERT_ARGS)
625633
args["show_progress"] = progress
626634
mocked.assert_called_once_with("in", "out", **args)
627635

tests/test_vcf_examples.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,10 +455,22 @@ def test_call_LAA(self, ds):
455455
class TestTriploidExample:
456456
data_path = "tests/data/vcf/triploid.vcf.gz"
457457

458-
def test_value_error(self, tmp_path_factory):
458+
@pytest.fixture(scope="class")
459+
def ds(self, tmp_path_factory):
460+
out = tmp_path_factory.mktemp("data") / "example.vcf.zarr"
461+
vcf2zarr.convert([self.data_path], out, local_alleles=False)
462+
return sg.load_dataset(out)
463+
464+
def test_error_with_local_alleles(self, tmp_path_factory):
459465
icf_path = tmp_path_factory.mktemp("data") / "triploid.icf"
460466
with pytest.raises(ValueError, match=re.escape("Cannot handle ploidy = 3")):
461-
vcf2zarr.explode(icf_path, [self.data_path], worker_processes=0)
467+
vcf2zarr.explode(
468+
icf_path, [self.data_path], worker_processes=0, local_alleles=True
469+
)
470+
471+
def test_ok_without_local_alleles(self, ds):
472+
nt.assert_array_equal(ds.call_genotype.values, [[[0, 0, 0]]])
473+
nt.assert_array_equal(ds.call_PL.values, [[[0, 0, 0, 0]]])
462474

463475

464476
class Test1000G2020Example:

0 commit comments

Comments
 (0)