Skip to content

Commit 4773042

Browse files
khetherintcezard
authored andcommitted
significant restructuring
1 parent d24dde0 commit 4773042

File tree

7 files changed

+568
-562
lines changed

7 files changed

+568
-562
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# this is an assistant converter to help convert gvf attributes
2+
import os
3+
from convert_gvf_to_vcf.utils import read_info_attributes
4+
from convert_gvf_to_vcf.helpers import generate_custom_structured_meta_line
5+
# setting up paths to useful directories
6+
convert_gvf_to_vcf_folder = os.path.dirname(__file__)
7+
etc_folder = os.path.join(convert_gvf_to_vcf_folder, 'etc')
8+
9+
def get_gvf_attributes(column9_of_gvf):
10+
"""Get a dictionary of GVF attributes
11+
:param column9_of_gvf: column - the final column of the GVF file
12+
:return: gvf_attribute_dictionary: a dictionary of attribute keys and their values
13+
"""
14+
gvf_attribute_dictionary = {} # attribute key => value
15+
# parse by semicolon this creates attribute
16+
# parse by equals sign this creates tag-values, if the value is a comma, create a list
17+
attributes_in_gvf_line = column9_of_gvf.split(";")
18+
for attribute in attributes_in_gvf_line:
19+
attribute_key, attribute_value = attribute.split("=")
20+
if "," in attribute_value:
21+
attribute_value_list = attribute_value.split(",")
22+
gvf_attribute_dictionary[attribute_key] = attribute_value_list
23+
else:
24+
gvf_attribute_dictionary[attribute_key] = attribute_value
25+
return gvf_attribute_dictionary
26+
27+
class Assistingconverter:
28+
@staticmethod
29+
def convert_gvf_attributes_to_vcf_values(column9_of_gvf,
30+
info_attribute_dict,
31+
field_lines_dictionary,
32+
all_possible_lines_dictionary):
33+
gvf_attribute_dictionary = get_gvf_attributes(column9_of_gvf)
34+
vcf_vals = {}
35+
catching_for_review = []
36+
# print("dgva_attribute_dict", dgva_attribute_dict)
37+
mapping_attribute_dict = read_info_attributes(os.path.join(etc_folder, 'attribute_mapper.tsv'))
38+
# created a rough guide to attributes_for_custom_structured_metainformation in INFOattributes.tsv = this probably should be refined at a later date
39+
# TODO: edit INFOattributes.tsv i.e. replace unknown placeholders '.' with the actual answer, provide a more informative description
40+
for attrib_key in gvf_attribute_dictionary:
41+
# if dgva specific key, create custom INFO tag's meta information line
42+
if attrib_key in info_attribute_dict:
43+
field_lines_dictionary["INFO"].append(
44+
generate_custom_structured_meta_line(
45+
vcf_key="INFO", vcf_key_id=attrib_key,
46+
vcf_key_number=info_attribute_dict[attrib_key][1],
47+
vcf_key_type=info_attribute_dict[attrib_key][2],
48+
vcf_key_description=info_attribute_dict[attrib_key][3],
49+
optional_extra_fields=None)
50+
)
51+
vcf_vals[attrib_key] = gvf_attribute_dictionary[attrib_key]
52+
elif attrib_key in mapping_attribute_dict:
53+
field = mapping_attribute_dict[attrib_key][1]
54+
key_for_field = mapping_attribute_dict[attrib_key][2]
55+
field_lines_dictionary[field].append(all_possible_lines_dictionary[field][key_for_field])
56+
57+
elif attrib_key == "sample_name":
58+
# sample_names.append(sample_names)
59+
pass
60+
# GVF keys (not dgva specific)
61+
elif attrib_key == "ID":
62+
pass
63+
elif attrib_key == "Variant_seq":
64+
pass
65+
elif attrib_key == "Reference_seq":
66+
pass
67+
68+
elif attrib_key == "Dbxref":
69+
# custom info tag + pase and add to id?
70+
pass
71+
elif attrib_key == "Variant_reads":
72+
# reserved info/format key, AD/AC
73+
pass
74+
elif attrib_key == "Zygosity":
75+
# format and GT tag
76+
pass
77+
elif attrib_key == "Phased":
78+
# GT or FORMAT PS
79+
pass
80+
elif attrib_key == "Start_range":
81+
# either custom info tag or CIPOS or CIEND, may need imprecise
82+
pass
83+
elif attrib_key == "End_range":
84+
# either custom info tag or CIEND, may need imprecise
85+
pass
86+
elif attrib_key == "Breakpoint_range":
87+
# either custom info tag or CIPOS, CIEND, may need imprecise
88+
pass
89+
elif attrib_key == "Individual":
90+
# sampl name for each column
91+
pass
92+
# elif attrib_key == "Total_reads":
93+
# # reserved info key, DP
94+
# pass
95+
# elif attrib_key == "Variant_freq":
96+
# # reserve info tag, AF
97+
# pass
98+
# elif attrib_key == "Genotype":
99+
# # GT
100+
# pass
101+
else:
102+
print("catching these attribute keys for review at a later date", attrib_key)
103+
catching_for_review.append(attrib_key)
104+
# print("dictionary", gvf_attribute_dictionary)
105+
# print("vcf_vals", vcf_vals)
106+
return gvf_attribute_dictionary

0 commit comments

Comments
 (0)