Skip to content

Commit 9df757b

Browse files
committed
added format to compare_and_merge_lines
1 parent e00bea3 commit 9df757b

File tree

1 file changed

+46
-9
lines changed

1 file changed

+46
-9
lines changed

convert_gvf_to_vcf/convertGVFtoVCF.py

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def compare_and_merge_lines(list_of_formatted_vcf_datalines, headerline):
319319
and previous_data["POS"] == current_data["POS"]
320320
and previous_data["REF"] == current_data["REF"]
321321
):
322-
print("True - merge")
322+
# print("True - merge")
323323
merged_data["#CHROM"] = current_data["#CHROM"]
324324
merged_data["POS"] = current_data["POS"]
325325
merged_data["ID"] = merge_and_add(previous_data["ID"], current_data["ID"], ";")
@@ -356,17 +356,54 @@ def compare_and_merge_lines(list_of_formatted_vcf_datalines, headerline):
356356
parts_of_info_string.append(part_string)
357357
merged_info_string = ';'.join(parts_of_info_string)
358358
merged_data["INFO"] = merged_info_string
359+
# FORMAT
360+
previous_format_key_tokens = previous_data["FORMAT"].split(":")
361+
current_format_key_tokens = current_data["FORMAT"].split(":")
362+
merged_format_key_tokens = []
363+
for format_key in previous_format_key_tokens + current_format_key_tokens:
364+
if format_key not in merged_format_key_tokens:
365+
merged_format_key_tokens.append(format_key)
366+
367+
merged_format_key_string = ':'.join(merged_format_key_tokens)
368+
merged_data["FORMAT"] = merged_format_key_string
369+
# sample values.
370+
sample_names = header_fields[9:]
371+
merged_sample_format = {}
372+
for sample_name in sample_names:
373+
previous_sample_format_value = dict(zip(previous_data["FORMAT"].split(":"),previous_data[sample_name].split(":")))
374+
current_sample_format_value = dict(zip(current_data["FORMAT"].split(":"),current_data[sample_name].split(":")))
375+
smallest_format_dict, biggest_format_dict = get_bigger_dictionary(previous_sample_format_value, current_sample_format_value)
376+
for k in biggest_format_dict:
377+
biggest_value = biggest_format_dict.get(k)
378+
smallest_value = smallest_format_dict.get(k)
379+
380+
if biggest_value is None or biggest_value == ".":
381+
biggest_value = ""
382+
if smallest_value is None or smallest_value == ".":
383+
smallest_value = ""
384+
element = merge_and_add(biggest_value, smallest_value, "")
385+
if element == "":
386+
element = "."
387+
388+
# merged_sample_format[sample_name] = {k: element}
389+
if sample_name not in merged_sample_format:
390+
merged_sample_format[sample_name] = {}
391+
merged_sample_format[sample_name].setdefault(k, []).append(element)
392+
else:
393+
merged_sample_format[sample_name].setdefault(k, []).append(element)
359394

360395

361-
print(previous_data)
362-
print(current_data)
363-
print(merged_data)
364-
print("---")
396+
values = []
397+
for sample_name in sample_names:
398+
for key in merged_sample_format[sample_name]:
399+
values.append(merged_sample_format[sample_name][key])
400+
flat_values = [v2 for v1 in values for v2 in v1 ]
401+
sample_format_string =':'.join(flat_values)
365402

366-
else:
367-
print("False - keep previous")
368-
# print(previous_data)
369-
print("---")
403+
merged_data[sample_name] = sample_format_string
404+
# else:
405+
# print("False - keep previous")
406+
# print("---")
370407

371408

372409

0 commit comments

Comments
 (0)