|
1 | 1 | import json
|
2 | 2 |
|
| 3 | +import numpy.testing as nt |
| 4 | +import pandas as pd |
3 | 5 | import pysam
|
4 | 6 | import pytest
|
5 | 7 | import sgkit as sg
|
@@ -662,3 +664,104 @@ def test_removed_samples(self, tmp_path, schema, icf_path, samples):
|
662 | 664 | json.dump(d, f)
|
663 | 665 | with pytest.raises(ValueError, match="Subsetting or reordering samples"):
|
664 | 666 | vcf2zarr.encode(icf_path, tmp_path / "z", schema_path=schema_path)
|
| 667 | + |
| 668 | + |
| 669 | +class TestInspect: |
| 670 | + def test_icf(self, icf_path): |
| 671 | + df = pd.DataFrame(vcz_mod.inspect(icf_path)) |
| 672 | + assert sorted(list(df)) == sorted( |
| 673 | + [ |
| 674 | + "name", |
| 675 | + "type", |
| 676 | + "chunks", |
| 677 | + "size", |
| 678 | + "compressed", |
| 679 | + "max_n", |
| 680 | + "min_val", |
| 681 | + "max_val", |
| 682 | + ] |
| 683 | + ) |
| 684 | + nt.assert_array_equal( |
| 685 | + sorted(df["name"].values), |
| 686 | + sorted( |
| 687 | + [ |
| 688 | + "CHROM", |
| 689 | + "POS", |
| 690 | + "QUAL", |
| 691 | + "ID", |
| 692 | + "FILTERS", |
| 693 | + "REF", |
| 694 | + "ALT", |
| 695 | + "rlen", |
| 696 | + "INFO/NS", |
| 697 | + "INFO/AN", |
| 698 | + "INFO/AC", |
| 699 | + "INFO/DP", |
| 700 | + "INFO/AF", |
| 701 | + "INFO/AA", |
| 702 | + "INFO/DB", |
| 703 | + "INFO/H2", |
| 704 | + "FORMAT/GT", |
| 705 | + "FORMAT/GQ", |
| 706 | + "FORMAT/DP", |
| 707 | + "FORMAT/HQ", |
| 708 | + ] |
| 709 | + ), |
| 710 | + ) |
| 711 | + |
| 712 | + def test_vcz(self, zarr_path): |
| 713 | + df = pd.DataFrame(vcz_mod.inspect(zarr_path)) |
| 714 | + cols = [ |
| 715 | + "name", |
| 716 | + "dtype", |
| 717 | + "stored", |
| 718 | + "size", |
| 719 | + "ratio", |
| 720 | + "nchunks", |
| 721 | + "chunk_size", |
| 722 | + "avg_chunk_stored", |
| 723 | + "shape", |
| 724 | + "chunk_shape", |
| 725 | + "compressor", |
| 726 | + "filters", |
| 727 | + ] |
| 728 | + assert sorted(list(df)) == sorted(cols) |
| 729 | + fields = [ |
| 730 | + "/call_genotype", |
| 731 | + "/call_HQ", |
| 732 | + "/call_genotype_mask", |
| 733 | + "/call_GQ", |
| 734 | + "/call_DP", |
| 735 | + "/call_genotype_phased", |
| 736 | + "/variant_allele", |
| 737 | + "/variant_AC", |
| 738 | + "/variant_AF", |
| 739 | + "/region_index", |
| 740 | + "/variant_filter", |
| 741 | + "/variant_id", |
| 742 | + "/variant_contig", |
| 743 | + "/variant_AA", |
| 744 | + "/variant_quality", |
| 745 | + "/variant_position", |
| 746 | + "/variant_AN", |
| 747 | + "/variant_length", |
| 748 | + "/variant_NS", |
| 749 | + "/variant_DB", |
| 750 | + "/variant_DP", |
| 751 | + "/variant_H2", |
| 752 | + "/sample_id", |
| 753 | + "/variant_id_mask", |
| 754 | + "/filter_id", |
| 755 | + "/contig_id", |
| 756 | + ] |
| 757 | + nt.assert_array_equal(sorted(df["name"]), sorted(fields)) |
| 758 | + |
| 759 | + @pytest.mark.parametrize("bad_path", ["/NO_WAY", "TTTTTT"]) |
| 760 | + def test_no_such_path(self, bad_path): |
| 761 | + with pytest.raises(ValueError, match=f"Path not found: {bad_path}"): |
| 762 | + vcz_mod.inspect(bad_path) |
| 763 | + |
| 764 | + @pytest.mark.parametrize("path", ["./", "tests/data/vcf/sample.vcf.gz"]) |
| 765 | + def test_unknown_format(self, path): |
| 766 | + with pytest.raises(ValueError, match="not in ICF or VCF Zarr format"): |
| 767 | + vcz_mod.inspect(path) |
0 commit comments