1+ # this is an assistant converter to help convert gvf attributes
2+ import os
3+ from convert_gvf_to_vcf .utils import read_info_attributes
4+ from convert_gvf_to_vcf .helpers import generate_custom_structured_meta_line
5+ # setting up paths to useful directories
6+ convert_gvf_to_vcf_folder = os .path .dirname (__file__ )
7+ etc_folder = os .path .join (convert_gvf_to_vcf_folder , 'etc' )
8+
9+ def get_gvf_attributes (column9_of_gvf ):
10+ """Get a dictionary of GVF attributes
11+ :param column9_of_gvf: column - the final column of the GVF file
12+ :return: gvf_attribute_dictionary: a dictionary of attribute keys and their values
13+ """
14+ gvf_attribute_dictionary = {} # attribute key => value
15+ # parse by semicolon this creates attribute
16+ # parse by equals sign this creates tag-values, if the value is a comma, create a list
17+ attributes_in_gvf_line = column9_of_gvf .split (";" )
18+ for attribute in attributes_in_gvf_line :
19+ attribute_key , attribute_value = attribute .split ("=" )
20+ if "," in attribute_value :
21+ attribute_value_list = attribute_value .split ("," )
22+ gvf_attribute_dictionary [attribute_key ] = attribute_value_list
23+ else :
24+ gvf_attribute_dictionary [attribute_key ] = attribute_value
25+ return gvf_attribute_dictionary
26+
27+ class Assistingconverter :
28+ @staticmethod
29+ def convert_gvf_attributes_to_vcf_values (column9_of_gvf ,
30+ info_attribute_dict ,
31+ field_lines_dictionary ,
32+ all_possible_lines_dictionary ):
33+ gvf_attribute_dictionary = get_gvf_attributes (column9_of_gvf )
34+ vcf_vals = {}
35+ catching_for_review = []
36+ # print("dgva_attribute_dict", dgva_attribute_dict)
37+ mapping_attribute_dict = read_info_attributes (os .path .join (etc_folder , 'attribute_mapper.tsv' ))
38+ # created a rough guide to attributes_for_custom_structured_metainformation in INFOattributes.tsv = this probably should be refined at a later date
39+ # TODO: edit INFOattributes.tsv i.e. replace unknown placeholders '.' with the actual answer, provide a more informative description
40+ for attrib_key in gvf_attribute_dictionary :
41+ # if dgva specific key, create custom INFO tag's meta information line
42+ if attrib_key in info_attribute_dict :
43+ field_lines_dictionary ["INFO" ].append (
44+ generate_custom_structured_meta_line (
45+ vcf_key = "INFO" , vcf_key_id = attrib_key ,
46+ vcf_key_number = info_attribute_dict [attrib_key ][1 ],
47+ vcf_key_type = info_attribute_dict [attrib_key ][2 ],
48+ vcf_key_description = info_attribute_dict [attrib_key ][3 ],
49+ optional_extra_fields = None )
50+ )
51+ vcf_vals [attrib_key ] = gvf_attribute_dictionary [attrib_key ]
52+ elif attrib_key in mapping_attribute_dict :
53+ field = mapping_attribute_dict [attrib_key ][1 ]
54+ key_for_field = mapping_attribute_dict [attrib_key ][2 ]
55+ field_lines_dictionary [field ].append (all_possible_lines_dictionary [field ][key_for_field ])
56+
57+ elif attrib_key == "sample_name" :
58+ # sample_names.append(sample_names)
59+ pass
60+ # GVF keys (not dgva specific)
61+ elif attrib_key == "ID" :
62+ pass
63+ elif attrib_key == "Variant_seq" :
64+ pass
65+ elif attrib_key == "Reference_seq" :
66+ pass
67+
68+ elif attrib_key == "Dbxref" :
69+ # custom info tag + pase and add to id?
70+ pass
71+ elif attrib_key == "Variant_reads" :
72+ # reserved info/format key, AD/AC
73+ pass
74+ elif attrib_key == "Zygosity" :
75+ # format and GT tag
76+ pass
77+ elif attrib_key == "Phased" :
78+ # GT or FORMAT PS
79+ pass
80+ elif attrib_key == "Start_range" :
81+ # either custom info tag or CIPOS or CIEND, may need imprecise
82+ pass
83+ elif attrib_key == "End_range" :
84+ # either custom info tag or CIEND, may need imprecise
85+ pass
86+ elif attrib_key == "Breakpoint_range" :
87+ # either custom info tag or CIPOS, CIEND, may need imprecise
88+ pass
89+ elif attrib_key == "Individual" :
90+ # sampl name for each column
91+ pass
92+ # elif attrib_key == "Total_reads":
93+ # # reserved info key, DP
94+ # pass
95+ # elif attrib_key == "Variant_freq":
96+ # # reserve info tag, AF
97+ # pass
98+ # elif attrib_key == "Genotype":
99+ # # GT
100+ # pass
101+ else :
102+ print ("catching these attribute keys for review at a later date" , attrib_key )
103+ catching_for_review .append (attrib_key )
104+ # print("dictionary", gvf_attribute_dictionary)
105+ # print("vcf_vals", vcf_vals)
106+ return gvf_attribute_dictionary
0 commit comments