@@ -319,7 +319,7 @@ def compare_and_merge_lines(list_of_formatted_vcf_datalines, headerline):
319319 and previous_data ["POS" ] == current_data ["POS" ]
320320 and previous_data ["REF" ] == current_data ["REF" ]
321321 ):
322- print ("True - merge" )
322+ # print("True - merge")
323323 merged_data ["#CHROM" ] = current_data ["#CHROM" ]
324324 merged_data ["POS" ] = current_data ["POS" ]
325325 merged_data ["ID" ] = merge_and_add (previous_data ["ID" ], current_data ["ID" ], ";" )
@@ -356,17 +356,54 @@ def compare_and_merge_lines(list_of_formatted_vcf_datalines, headerline):
356356 parts_of_info_string .append (part_string )
357357 merged_info_string = ';' .join (parts_of_info_string )
358358 merged_data ["INFO" ] = merged_info_string
359+ # FORMAT
360+ previous_format_key_tokens = previous_data ["FORMAT" ].split (":" )
361+ current_format_key_tokens = current_data ["FORMAT" ].split (":" )
362+ merged_format_key_tokens = []
363+ for format_key in previous_format_key_tokens + current_format_key_tokens :
364+ if format_key not in merged_format_key_tokens :
365+ merged_format_key_tokens .append (format_key )
366+
367+ merged_format_key_string = ':' .join (merged_format_key_tokens )
368+ merged_data ["FORMAT" ] = merged_format_key_string
369+ # sample values.
370+ sample_names = header_fields [9 :]
371+ merged_sample_format = {}
372+ for sample_name in sample_names :
373+ previous_sample_format_value = dict (zip (previous_data ["FORMAT" ].split (":" ),previous_data [sample_name ].split (":" )))
374+ current_sample_format_value = dict (zip (current_data ["FORMAT" ].split (":" ),current_data [sample_name ].split (":" )))
375+ smallest_format_dict , biggest_format_dict = get_bigger_dictionary (previous_sample_format_value , current_sample_format_value )
376+ for k in biggest_format_dict :
377+ biggest_value = biggest_format_dict .get (k )
378+ smallest_value = smallest_format_dict .get (k )
379+
380+ if biggest_value is None or biggest_value == "." :
381+ biggest_value = ""
382+ if smallest_value is None or smallest_value == "." :
383+ smallest_value = ""
384+ element = merge_and_add (biggest_value , smallest_value , "" )
385+ if element == "" :
386+ element = "."
387+
388+ # merged_sample_format[sample_name] = {k: element}
389+ if sample_name not in merged_sample_format :
390+ merged_sample_format [sample_name ] = {}
391+ merged_sample_format [sample_name ].setdefault (k , []).append (element )
392+ else :
393+ merged_sample_format [sample_name ].setdefault (k , []).append (element )
359394
360395
361- print (previous_data )
362- print (current_data )
363- print (merged_data )
364- print ("---" )
396+ values = []
397+ for sample_name in sample_names :
398+ for key in merged_sample_format [sample_name ]:
399+ values .append (merged_sample_format [sample_name ][key ])
400+ flat_values = [v2 for v1 in values for v2 in v1 ]
401+ sample_format_string = ':' .join (flat_values )
365402
366- else :
367- print ( "False - keep previous" )
368- # print(previous_data )
369- print ("---" )
403+ merged_data [ sample_name ] = sample_format_string
404+ # else:
405+ # print("False - keep previous" )
406+ # print("---")
370407
371408
372409
0 commit comments