@@ -29,7 +29,7 @@ def normalise_info_missingness(info_dict, key):
29
29
return value
30
30
31
31
32
- def _get_headers (vcf , header_type ):
32
+ def _get_header_field_dicts (vcf , header_type ):
33
33
def to_dict (header_field ):
34
34
d = header_field .info (extra = True )
35
35
del d [b"IDX" ] # remove IDX since we don't care about ordering
@@ -47,25 +47,44 @@ def to_dict(header_field):
47
47
}
48
48
49
49
50
+ def _assert_header_field_dicts_equivalent (field_dicts1 , field_dicts2 ):
51
+ assert len (field_dicts1 ) == len (field_dicts2 )
52
+
53
+ for id in field_dicts1 .keys ():
54
+ assert id in field_dicts2
55
+ field_dict1 = field_dicts1 [id ]
56
+ field_dict2 = field_dicts2 [id ]
57
+
58
+ assert len (field_dict1 ) == len (field_dict2 )
59
+ # all fields should be the same, except Number="." which can match any value
60
+ for k in field_dict1 .keys ():
61
+ assert k in field_dict2
62
+ v1 = field_dict1 [k ]
63
+ v2 = field_dict2 [k ]
64
+ if k == "Number" and (v1 == "." or v2 == "." ):
65
+ continue
66
+ assert v1 == v2 , f"Failed in field { id } with key { k } "
67
+
68
+
50
69
def _assert_vcf_headers_equivalent (vcf1 , vcf2 ):
51
70
# Only compare INFO, FORMAT, FILTER, CONTIG fields, ignoring order
52
71
# Other fields are ignored
53
72
54
- info1 = _get_headers (vcf1 , "INFO" )
55
- info2 = _get_headers (vcf2 , "INFO" )
56
- assert info1 == info2
73
+ info1 = _get_header_field_dicts (vcf1 , "INFO" )
74
+ info2 = _get_header_field_dicts (vcf2 , "INFO" )
75
+ _assert_header_field_dicts_equivalent ( info1 , info2 )
57
76
58
- format1 = _get_headers (vcf1 , "FORMAT" )
59
- format2 = _get_headers (vcf2 , "FORMAT" )
60
- assert format1 == format2
77
+ format1 = _get_header_field_dicts (vcf1 , "FORMAT" )
78
+ format2 = _get_header_field_dicts (vcf2 , "FORMAT" )
79
+ _assert_header_field_dicts_equivalent ( format1 , format2 )
61
80
62
- filter1 = _get_headers (vcf1 , "FILTER" )
63
- filter2 = _get_headers (vcf2 , "FILTER" )
64
- assert filter1 == filter2
81
+ filter1 = _get_header_field_dicts (vcf1 , "FILTER" )
82
+ filter2 = _get_header_field_dicts (vcf2 , "FILTER" )
83
+ _assert_header_field_dicts_equivalent ( filter1 , filter2 )
65
84
66
- contig1 = _get_headers (vcf1 , "CONTIG" )
67
- contig2 = _get_headers (vcf2 , "CONTIG" )
68
- assert contig1 == contig2
85
+ contig1 = _get_header_field_dicts (vcf1 , "CONTIG" )
86
+ contig2 = _get_header_field_dicts (vcf2 , "CONTIG" )
87
+ _assert_header_field_dicts_equivalent ( contig1 , contig2 )
69
88
70
89
71
90
def assert_vcfs_close (f1 , f2 , * , rtol = 1e-05 , atol = 1e-03 , allow_zero_variants = False ):
0 commit comments