Skip to content

Commit e093c43

Browse files
Basic documentation and polish
1 parent 6eb33a7 commit e093c43

File tree

4 files changed

+22
-9
lines changed

4 files changed

+22
-9
lines changed

bio2zarr/cli.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,17 @@ def dencode_init(
332332
verbose,
333333
):
334334
"""
335-
TODO DOCUMENT
335+
Initialise conversion of intermediate format to VCF Zarr. This will
336+
set up the specified ZARR_PATH to perform this conversion over
337+
NUM_PARTITIONS.
338+
339+
The output of this commmand is the actual number of partitions generated
340+
(which may be less then the requested number, if there is not sufficient
341+
chunks in the variants dimension) and a rough lower-bound on the amount
342+
of memory required to encode a partition.
343+
344+
NOTE: the format of this output will likely change in subsequent releases;
345+
it should not be considered machine-readable for now.
336346
"""
337347
setup_logging(verbose)
338348
check_overwrite_dir(zarr_path, force)
@@ -363,7 +373,10 @@ def dencode_init(
363373
@verbose
364374
def dencode_partition(zarr_path, partition, verbose):
365375
"""
366-
TODO DOCUMENT
376+
Convert a partition from intermediate columnar format to VCF Zarr.
377+
Must be called *after* the Zarr path has been initialised with dencode_init.
378+
Partition indexes must be from 0 (inclusive) to the number of paritions
379+
returned by dencode_init (exclusive).
367380
"""
368381
setup_logging(verbose)
369382
vcf.encode_partition(zarr_path, partition)
@@ -374,10 +387,10 @@ def dencode_partition(zarr_path, partition, verbose):
374387
@verbose
375388
def dencode_finalise(zarr_path, verbose):
376389
"""
377-
TODO DOCUMENT
390+
Final step for distributed conversion of ICF to VCF Zarr.
378391
"""
379392
setup_logging(verbose)
380-
vcf.encode_finalise(zarr_path)
393+
vcf.encode_finalise(zarr_path, show_progress=True)
381394

382395

383396
@click.command(name="convert")

bio2zarr/vcf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2026,7 +2026,7 @@ def encode_all_partitions(
20262026
):
20272027
max_memory = parse_max_memory(max_memory)
20282028
self.load_metadata()
2029-
num_partitions = len(self.num_partitions)
2029+
num_partitions = self.num_partitions
20302030
per_worker_memory = self.get_max_encoding_memory()
20312031
logger.info(
20322032
f"Encoding Zarr over {num_partitions} partitions with "

tests/test_cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ def test_encode(self, mocked, tmp_path):
396396
**DEFAULT_ENCODE_ARGS,
397397
)
398398

399-
@mock.patch("bio2zarr.vcf.encode_init", return_value=10)
399+
@mock.patch("bio2zarr.vcf.encode_init", return_value=(10, 1024))
400400
def test_dencode_init(self, mocked, tmp_path):
401401
icf_path = tmp_path / "icf"
402402
icf_path.mkdir()
@@ -408,7 +408,7 @@ def test_dencode_init(self, mocked, tmp_path):
408408
catch_exceptions=False,
409409
)
410410
assert result.exit_code == 0
411-
assert result.stdout == "10\n"
411+
assert result.stdout == "10\t1 KiB\n"
412412
assert len(result.stderr) == 0
413413
mocked.assert_called_once_with(
414414
str(icf_path),
@@ -564,7 +564,7 @@ def test_dencode(self, tmp_path):
564564
catch_exceptions=False,
565565
)
566566
assert result.exit_code == 0
567-
assert result.stdout.strip() == "3"
567+
assert result.stdout.split()[0] == "3"
568568

569569
for j in range(3):
570570
result = runner.invoke(

tests/test_vcf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def test_not_enough_memory(self, tmp_path, icf_path, max_memory):
6464
with pytest.raises(ValueError, match="Insufficient memory"):
6565
vcf.encode(icf_path, zarr_path, max_memory=max_memory)
6666

67-
@pytest.mark.parametrize("max_memory", ["20MiB"])
67+
@pytest.mark.parametrize("max_memory", ["150KiB", "200KiB"])
6868
def test_not_enough_memory_for_two(
6969
self, tmp_path, icf_path, zarr_path, caplog, max_memory
7070
):

0 commit comments

Comments
 (0)