|
5 | 5 | import xarray.testing as xt
|
6 | 6 | import zarr
|
7 | 7 |
|
8 |
| -from bio2zarr import vcf, vcf_utils |
| 8 | +from bio2zarr import core, vcf, vcf_utils |
9 | 9 |
|
10 | 10 |
|
11 | 11 | @pytest.fixture(scope="module")
|
@@ -383,3 +383,117 @@ def test_fields(self, schema, field, description):
|
383 | 383 | # ])
|
384 | 384 | # def test_filters(self, schema, filt, description):
|
385 | 385 | # assert schema["filters"][field]["description"] == description
|
| 386 | + |
| 387 | + |
| 388 | +class TestVcfZarrWriterExample: |
| 389 | + arrays = ( |
| 390 | + "variant_contig", |
| 391 | + "variant_filter", |
| 392 | + "variant_id", |
| 393 | + "variant_AA", |
| 394 | + "variant_AC", |
| 395 | + "variant_AF", |
| 396 | + "variant_AN", |
| 397 | + "variant_DB", |
| 398 | + "variant_DP", |
| 399 | + "variant_H2", |
| 400 | + "variant_NS", |
| 401 | + "variant_position", |
| 402 | + "variant_quality", |
| 403 | + "variant_allele", |
| 404 | + "call_DP", |
| 405 | + "call_GQ", |
| 406 | + "call_genotype", |
| 407 | + "call_genotype_phased", |
| 408 | + "call_genotype_mask", |
| 409 | + "call_HQ", |
| 410 | + ) |
| 411 | + |
| 412 | + def test_init_paths(self, icf_path, tmp_path): |
| 413 | + zarr_path = tmp_path / "x.zarr" |
| 414 | + assert not zarr_path.exists() |
| 415 | + num_partitions, _ = vcf.encode_init( |
| 416 | + icf_path, zarr_path, 7, variants_chunk_size=3 |
| 417 | + ) |
| 418 | + assert num_partitions == 3 |
| 419 | + assert zarr_path.exists() |
| 420 | + wip_path = zarr_path / "wip" |
| 421 | + assert wip_path.exists() |
| 422 | + wip_partitions_path = wip_path / "partitions" |
| 423 | + assert wip_partitions_path.exists() |
| 424 | + wip_arrays_path = wip_path / "arrays" |
| 425 | + assert wip_arrays_path.exists() |
| 426 | + for name in self.arrays: |
| 427 | + array_path = wip_arrays_path / name |
| 428 | + assert array_path.exists() |
| 429 | + with open(wip_path / "metadata.json") as f: |
| 430 | + d = json.loads(f.read()) |
| 431 | + # Basic test |
| 432 | + assert len(d["partitions"]) == 3 |
| 433 | + |
| 434 | + def test_finalise_paths(self, icf_path, tmp_path): |
| 435 | + zarr_path = tmp_path / "x.zarr" |
| 436 | + assert not zarr_path.exists() |
| 437 | + num_partitions, _ = vcf.encode_init( |
| 438 | + icf_path, zarr_path, 7, variants_chunk_size=3 |
| 439 | + ) |
| 440 | + wip_path = zarr_path / "wip" |
| 441 | + assert wip_path.exists() |
| 442 | + for j in range(num_partitions): |
| 443 | + vcf.encode_partition(zarr_path, j) |
| 444 | + assert (wip_path / "partitions" / f"p{j}").exists() |
| 445 | + vcf.encode_finalise(zarr_path) |
| 446 | + assert zarr_path.exists() |
| 447 | + assert not wip_path.exists() |
| 448 | + |
| 449 | + def test_finalise_no_partitions_fails(self, icf_path, tmp_path): |
| 450 | + zarr_path = tmp_path / "x.zarr" |
| 451 | + vcf.encode_init(icf_path, zarr_path, 3, variants_chunk_size=3) |
| 452 | + with pytest.raises( |
| 453 | + FileNotFoundError, match="Partitions not encoded: \\[0, 1, 2\\]" |
| 454 | + ): |
| 455 | + vcf.encode_finalise(zarr_path) |
| 456 | + |
| 457 | + @pytest.mark.parametrize("partition", [0, 1, 2]) |
| 458 | + def test_finalise_missing_partition_fails(self, icf_path, tmp_path, partition): |
| 459 | + zarr_path = tmp_path / "x.zarr" |
| 460 | + vcf.encode_init(icf_path, zarr_path, 3, variants_chunk_size=3) |
| 461 | + for j in range(3): |
| 462 | + if j != partition: |
| 463 | + vcf.encode_partition(zarr_path, j) |
| 464 | + with pytest.raises( |
| 465 | + FileNotFoundError, match=f"Partitions not encoded: \\[{partition}\\]" |
| 466 | + ): |
| 467 | + vcf.encode_finalise(zarr_path) |
| 468 | + |
| 469 | + @pytest.mark.parametrize("partition", [0, 1, 2]) |
| 470 | + def test_encode_partition(self, icf_path, tmp_path, partition): |
| 471 | + zarr_path = tmp_path / "x.zarr" |
| 472 | + vcf.encode_init(icf_path, zarr_path, 3, variants_chunk_size=3) |
| 473 | + partition_path = zarr_path / "wip" / "partitions" / f"p{partition}" |
| 474 | + assert not partition_path.exists() |
| 475 | + vcf.encode_partition(zarr_path, partition) |
| 476 | + assert partition_path.exists() |
| 477 | + |
| 478 | + def test_double_encode_partition(self, icf_path, tmp_path, caplog): |
| 479 | + partition = 1 |
| 480 | + zarr_path = tmp_path / "x.zarr" |
| 481 | + vcf.encode_init(icf_path, zarr_path, 3, variants_chunk_size=3) |
| 482 | + partition_path = zarr_path / "wip" / "partitions" / f"p{partition}" |
| 483 | + assert not partition_path.exists() |
| 484 | + vcf.encode_partition(zarr_path, partition) |
| 485 | + assert partition_path.exists() |
| 486 | + size = core.du(partition_path) |
| 487 | + assert size > 0 |
| 488 | + with caplog.at_level("WARNING"): |
| 489 | + vcf.encode_partition(zarr_path, partition) |
| 490 | + assert "Removing existing partition at" in caplog.text |
| 491 | + assert partition_path.exists() |
| 492 | + assert core.du(partition_path) == size |
| 493 | + |
| 494 | + @pytest.mark.parametrize("partition", [-1, 3, 100]) |
| 495 | + def test_encode_partition_out_of_range(self, icf_path, tmp_path, partition): |
| 496 | + zarr_path = tmp_path / "x.zarr" |
| 497 | + vcf.encode_init(icf_path, zarr_path, 3, variants_chunk_size=3) |
| 498 | + with pytest.raises(ValueError, match="Partition index must be in the range"): |
| 499 | + vcf.encode_partition(zarr_path, partition) |
0 commit comments