Skip to content

Commit 687c715

Browse files
Some basic testing for dencode, plus tidying
1 parent bf7d56b commit 687c715

File tree

3 files changed

+53
-19
lines changed

3 files changed

+53
-19
lines changed

bio2zarr/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ def dencode_partition(zarr_path, partition, verbose):
357357
TODO DOCUMENT
358358
"""
359359
setup_logging(verbose)
360-
vcf.encode_partition(zarr_path, partition, show_progress=False)
360+
vcf.encode_partition(zarr_path, partition)
361361

362362

363363
@click.command

bio2zarr/vcf.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1540,15 +1540,6 @@ def summary_table(self):
15401540
return data
15411541

15421542

1543-
@dataclasses.dataclass
1544-
class EncodingWork:
1545-
func: callable = dataclasses.field(repr=False)
1546-
start: int
1547-
stop: int
1548-
columns: list[str]
1549-
memory: int = 0
1550-
1551-
15521543
def parse_max_memory(max_memory):
15531544
if max_memory is None:
15541545
# Effectively unbounded
@@ -1640,7 +1631,7 @@ def init(
16401631
):
16411632
self.icf = icf
16421633
if self.path.exists():
1643-
raise ValueError("Zarr path already exists")
1634+
raise ValueError("Zarr path already exists") # NEEDS TEST
16441635
partitions = VcfZarrPartition.generate_partitions(
16451636
self.icf.num_records,
16461637
schema.variants_chunk_size,
@@ -1807,6 +1798,7 @@ def finalise_partition_array(self, partition_index, name):
18071798
wip_path = self.wip_partition_array_path(partition_index, name)
18081799
final_path = self.partition_array_path(partition_index, name)
18091800
if final_path.exists():
1801+
# NEEDS TEST
18101802
logger.warning(f"Removing existing {final_path}")
18111803
shutil.rmtree(final_path)
18121804
# Atomic swap
@@ -1923,7 +1915,7 @@ def encode_filters_partition(self, partition_index):
19231915
var_filter.buff[j, lookup[f]] = True
19241916
except KeyError:
19251917
raise ValueError(
1926-
f"Filter '{f}' was not defined " f"in the header."
1918+
f"Filter '{f}' was not defined in the header."
19271919
) from None
19281920
var_filter.flush()
19291921

@@ -1956,6 +1948,7 @@ def finalise_array(self, name):
19561948
logger.info(f"Finalising {name}")
19571949
final_path = self.path / name
19581950
if final_path.exists():
1951+
# NEEDS TEST
19591952
raise ValueError(f"Array {name} already exists")
19601953
for partition in range(len(self.metadata.partitions)):
19611954
# Move all the files in partition dir to dest dir
@@ -1992,7 +1985,12 @@ def finalise(self, show_progress=False):
19921985
# NOTE: it's not clear that adding more workers will make this quicker,
19931986
# as it's just going to be causing contention on the file system.
19941987
# Something to check empirically in some deployments.
1995-
with core.ParallelWorkManager(1, progress_config) as pwm:
1988+
# FIXME we're just using worker_processes=0 here to hook into the
1989+
# SynchronousExecutor which is intended for testing purposes so
1990+
# that we get test coverage. Should fix this either by allowing
1991+
# for multiple workers, or making a standard wrapper for tqdm
1992+
# that allows us to have a consistent look and feel.
1993+
with core.ParallelWorkManager(0, progress_config) as pwm:
19961994
for name in self.metadata.schema.columns:
19971995
pwm.submit(self.finalise_array, name)
19981996
zarr.consolidate_metadata(self.path)
@@ -2131,11 +2129,9 @@ def encode_init(
21312129
)
21322130

21332131

2134-
def encode_partition(zarr_path, partition, *, show_progress=False, worker_processes=1):
2132+
def encode_partition(zarr_path, partition):
21352133
writer = VcfZarrWriter(zarr_path)
2136-
writer.encode_partition(
2137-
partition, show_progress=show_progress, worker_processes=worker_processes
2138-
)
2134+
writer.encode_partition(partition)
21392135

21402136

21412137
def encode_finalise(zarr_path, show_progress=False):

tests/test_cli.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
show_progress=True,
4242
)
4343

44+
DEFAULT_DENCODE_PARTITION_ARGS = dict()
45+
4446

4547
class TestWithMocks:
4648
vcf_path = "tests/data/vcf/sample.vcf.gz"
@@ -395,7 +397,7 @@ def test_encode(self, mocked, tmp_path):
395397
)
396398

397399
@mock.patch("bio2zarr.vcf.encode_init", return_value=10)
398-
def test_dencode(self, mocked, tmp_path):
400+
def test_dencode_init(self, mocked, tmp_path):
399401
icf_path = tmp_path / "icf"
400402
icf_path.mkdir()
401403
zarr_path = tmp_path / "zarr"
@@ -429,7 +431,7 @@ def test_vcf_dencode_partition(self, mocked, tmp_path):
429431
assert len(result.stdout) == 0
430432
assert len(result.stderr) == 0
431433
mocked.assert_called_once_with(
432-
str(zarr_path), 1, **DEFAULT_DEXPLODE_PARTITION_ARGS
434+
str(zarr_path), 1, **DEFAULT_DENCODE_PARTITION_ARGS
433435
)
434436

435437
@mock.patch("bio2zarr.vcf.encode_finalise")
@@ -548,6 +550,42 @@ def test_encode(self, tmp_path):
548550
# Arbitrary check
549551
assert "variant_position" in result.stdout
550552

553+
def test_dencode(self, tmp_path):
554+
icf_path = tmp_path / "icf"
555+
zarr_path = tmp_path / "zarr"
556+
runner = ct.CliRunner(mix_stderr=False)
557+
result = runner.invoke(
558+
cli.vcf2zarr, f"explode {self.vcf_path} {icf_path}", catch_exceptions=False
559+
)
560+
assert result.exit_code == 0
561+
result = runner.invoke(
562+
cli.vcf2zarr,
563+
f"dencode-init {icf_path} {zarr_path} 5 --variants-chunk-size=3",
564+
catch_exceptions=False,
565+
)
566+
assert result.exit_code == 0
567+
assert result.stdout.strip() == "3"
568+
569+
for j in range(3):
570+
result = runner.invoke(
571+
cli.vcf2zarr,
572+
f"dencode-partition {zarr_path} {j}",
573+
catch_exceptions=False,
574+
)
575+
assert result.exit_code == 0
576+
577+
result = runner.invoke(
578+
cli.vcf2zarr, f"dencode-finalise {zarr_path}", catch_exceptions=False
579+
)
580+
assert result.exit_code == 0
581+
582+
result = runner.invoke(
583+
cli.vcf2zarr, f"inspect {zarr_path}", catch_exceptions=False
584+
)
585+
assert result.exit_code == 0
586+
# Arbitrary check
587+
assert "variant_position" in result.stdout
588+
551589
def test_convert(self, tmp_path):
552590
zarr_path = tmp_path / "zarr"
553591
runner = ct.CliRunner(mix_stderr=False)

0 commit comments

Comments
 (0)