55import sys
66import os
77
8- sys .path .insert (0 , os .path .dirname (os .path .realpath (__file__ )))
8+ sys .path .insert (0 , os .path .dirname (os .path .realpath (__file__ )))
99
1010from .imputation .impute import Imputation
1111from .imputation .networkx_graph import Graph
1212
1313# Profiler start
14- #pr = cProfile.Profile()
15- #pr.enable()
14+ # pr = cProfile.Profile()
15+ # pr.enable()
1616
17- def run_impute (conf_file = "../conf/minimal-configuration.json" , project_dir_graph = "" , project_dir_in_file = "" ):
1817
19- configuration_file = conf_file
18+ def run_impute (
19+ conf_file = "../conf/minimal-configuration.json" ,
20+ project_dir_graph = "" ,
21+ project_dir_in_file = "" ,
22+ ):
2023
21- #project_dir = ""# "../"
22- #output_dir = "output/"
24+ configuration_file = conf_file
2325
26+ # project_dir = ""# "../"
27+ # output_dir = "output/"
2428
2529 # Read configuration file and load properties
2630 with open (configuration_file ) as f :
2731 json_conf = json .load (f )
2832
2933 graph_files_path = json_conf .get ("graph_files_path" )
30- if graph_files_path [- 1 ] != '/' :
31- graph_files_path += '/'
34+ if graph_files_path [- 1 ] != "/" :
35+ graph_files_path += "/"
3236 output_dir = json_conf .get ("imuptation_out_path" , "output" )
33- if output_dir [- 1 ] != '/' :
34- output_dir += '/'
37+ if output_dir [- 1 ] != "/" :
38+ output_dir += "/"
3539 config = {
36- "planb" : json_conf .get (' planb' , True ),
37- "pops" : json_conf .get (' populations' ),
38- "priority" : json_conf .get (' priority' ),
39- "epsilon" : json_conf .get (' epsilon' , 1e-3 ),
40- "number_of_results" : json_conf .get (' number_of_results' , 1000 ),
41- "number_of_pop_results" : json_conf .get (' number_of_pop_results' , 100 ),
40+ "planb" : json_conf .get (" planb" , True ),
41+ "pops" : json_conf .get (" populations" ),
42+ "priority" : json_conf .get (" priority" ),
43+ "epsilon" : json_conf .get (" epsilon" , 1e-3 ),
44+ "number_of_results" : json_conf .get (" number_of_results" , 1000 ),
45+ "number_of_pop_results" : json_conf .get (" number_of_pop_results" , 100 ),
4246 "output_MUUG" : json_conf .get ("output_MUUG" , True ),
4347 "output_haplotypes" : json_conf .get ("output_haplotypes" , False ),
44- "node_file" : project_dir_graph + graph_files_path + json_conf .get ("node_csv_file" ),
45- "top_links_file" : project_dir_graph + graph_files_path + json_conf .get ("top_links_csv_file" ),
46- "edges_file" : project_dir_graph + graph_files_path + json_conf .get ("edges_csv_file" ),
47- "imputation_input_file" : project_dir_in_file + json_conf .get ("imputation_in_file" ),
48- "imputation_out_umug_freq_file" : output_dir + json_conf .get ("imputation_out_umug_freq_filename" ),
49- "imputation_out_umug_pops_file" : output_dir + json_conf .get ("imputation_out_umug_pops_filename" ),
50- "imputation_out_hap_freq_file" : output_dir + json_conf .get ("imputation_out_hap_freq_filename" ),
51- "imputation_out_hap_pops_file" : output_dir + json_conf .get ("imputation_out_hap_pops_filename" ),
52- "imputation_out_miss_file" : output_dir + json_conf .get ("imputation_out_miss_filename" ),
53- "imputation_out_problem_file" : output_dir + json_conf .get ("imputation_out_problem_filename" ),
48+ "node_file" : project_dir_graph
49+ + graph_files_path
50+ + json_conf .get ("node_csv_file" ),
51+ "top_links_file" : project_dir_graph
52+ + graph_files_path
53+ + json_conf .get ("top_links_csv_file" ),
54+ "edges_file" : project_dir_graph
55+ + graph_files_path
56+ + json_conf .get ("edges_csv_file" ),
57+ "imputation_input_file" : project_dir_in_file
58+ + json_conf .get ("imputation_in_file" ),
59+ "imputation_out_umug_freq_file" : output_dir
60+ + json_conf .get ("imputation_out_umug_freq_filename" ),
61+ "imputation_out_umug_pops_file" : output_dir
62+ + json_conf .get ("imputation_out_umug_pops_filename" ),
63+ "imputation_out_hap_freq_file" : output_dir
64+ + json_conf .get ("imputation_out_hap_freq_filename" ),
65+ "imputation_out_hap_pops_file" : output_dir
66+ + json_conf .get ("imputation_out_hap_pops_filename" ),
67+ "imputation_out_miss_file" : output_dir
68+ + json_conf .get ("imputation_out_miss_filename" ),
69+ "imputation_out_problem_file" : output_dir
70+ + json_conf .get ("imputation_out_problem_filename" ),
5471 "factor_missing_data" : json_conf .get ("factor_missing_data" , 0.01 ),
55- "loci_map" : json_conf .get ("loci_map" , {"A" : 1 , "B" :3 , "C" : 2 , "DQB1" : 4 , "DRB1" : 5 } ),
56- "matrix_planb" : json_conf .get ("Plan_B_Matrix" , [
57- [[1 , 2 , 3 , 4 , 5 ]],
58- [[1 , 2 , 3 ], [4 , 5 ]],
59- [[1 ], [2 , 3 ], [4 , 5 ]],
60- [[1 , 2 , 3 ], [4 ], [5 ]],
61- [[1 ], [2 , 3 ], [4 ], [5 ]],
62- [[1 ], [2 ], [3 ], [4 ], [5 ]]
63- ]),
64- "pops_count_file" : project_dir_graph + json_conf .get ("pops_count_file" ,'' ),
65- "use_pops_count_file" : json_conf .get ("pops_count_file" ,False ),
66- "number_of_options_threshold" : json_conf .get ("number_of_options_threshold" , 100000 ),
67- "max_haplotypes_number_in_phase" : json_conf .get ("max_haplotypes_number_in_phase" ,100 ),
68- "bin_imputation_input_file" : project_dir_in_file + json_conf .get ("bin_imputation_in_file" , "None" ),
72+ "loci_map" : json_conf .get (
73+ "loci_map" , {"A" : 1 , "B" : 3 , "C" : 2 , "DQB1" : 4 , "DRB1" : 5 }
74+ ),
75+ "matrix_planb" : json_conf .get (
76+ "Plan_B_Matrix" ,
77+ [
78+ [[1 , 2 , 3 , 4 , 5 ]],
79+ [[1 , 2 , 3 ], [4 , 5 ]],
80+ [[1 ], [2 , 3 ], [4 , 5 ]],
81+ [[1 , 2 , 3 ], [4 ], [5 ]],
82+ [[1 ], [2 , 3 ], [4 ], [5 ]],
83+ [[1 ], [2 ], [3 ], [4 ], [5 ]],
84+ ],
85+ ),
86+ "pops_count_file" : project_dir_graph + json_conf .get ("pops_count_file" , "" ),
87+ "use_pops_count_file" : json_conf .get ("pops_count_file" , False ),
88+ "number_of_options_threshold" : json_conf .get (
89+ "number_of_options_threshold" , 100000
90+ ),
91+ "max_haplotypes_number_in_phase" : json_conf .get (
92+ "max_haplotypes_number_in_phase" , 100
93+ ),
94+ "bin_imputation_input_file" : project_dir_in_file
95+ + json_conf .get ("bin_imputation_in_file" , "None" ),
6996 "nodes_for_plan_A" : json_conf .get ("Plan_A_Matrix" , []),
7097 "save_mode" : json_conf .get ("save_space_mode" , False ),
71- "UNK_priors" : json_conf .get ("UNK_priors" , "MR" )
72-
98+ "UNK_priors" : json_conf .get ("UNK_priors" , "MR" ),
7399 }
74100
75101 # Display the configurations we are using
76- print ('****************************************************************************************************' )
102+ print (
103+ "****************************************************************************************************"
104+ )
77105 print ("Performing imputation based on:" )
78106 print ("\t Population: {}" .format (config ["pops" ]))
79107 print ("\t Priority: {}" .format (config ["priority" ]))
@@ -86,34 +114,57 @@ def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_gra
86114 print ("\t Top Links File: {}" .format (config ["edges_file" ]))
87115 print ("\t Input File: {}" .format (config ["imputation_input_file" ]))
88116 print ("\t Output UMUG Format: {}" .format (config ["output_MUUG" ]))
89- print ("\t Output UMUG Freq Filename: {}" .format (config ["imputation_out_umug_freq_file" ]))
90- print ("\t Output UMUG Pops Filename: {}" .format (config ["imputation_out_umug_pops_file" ]))
117+ print (
118+ "\t Output UMUG Freq Filename: {}" .format (
119+ config ["imputation_out_umug_freq_file" ]
120+ )
121+ )
122+ print (
123+ "\t Output UMUG Pops Filename: {}" .format (
124+ config ["imputation_out_umug_pops_file" ]
125+ )
126+ )
91127 print ("\t Output Haplotype Format: {}" .format (config ["output_haplotypes" ]))
92- print ("\t Output HAP Freq Filename: {}" .format (config ["imputation_out_hap_freq_file" ]))
93- print ("\t Output HAP Pops Filename: {}" .format (config ["imputation_out_hap_pops_file" ]))
128+ print (
129+ "\t Output HAP Freq Filename: {}" .format (config ["imputation_out_hap_freq_file" ])
130+ )
131+ print (
132+ "\t Output HAP Pops Filename: {}" .format (config ["imputation_out_hap_pops_file" ])
133+ )
94134 print ("\t Output Miss Filename: {}" .format (config ["imputation_out_miss_file" ]))
95135 print ("\t Output Problem Filename: {}" .format (config ["imputation_out_problem_file" ]))
96136 print ("\t Factor Missing Data: {}" .format (config ["factor_missing_data" ]))
97137 print ("\t Loci Map: {}" .format (config ["loci_map" ]))
98138 print ("\t Plan B Matrix: {}" .format (config ["matrix_planb" ]))
99139 print ("\t Pops Count File: {}" .format (config ["pops_count_file" ]))
100140 print ("\t Use Pops Count File: {}" .format (config ["use_pops_count_file" ]))
101- print ("\t Number of Options Threshold: {}" .format (config ["number_of_options_threshold" ]))
102- print ("\t Max Number of haplotypes in phase: {}" .format (config ["max_haplotypes_number_in_phase" ]))
141+ print (
142+ "\t Number of Options Threshold: {}" .format (
143+ config ["number_of_options_threshold" ]
144+ )
145+ )
146+ print (
147+ "\t Max Number of haplotypes in phase: {}" .format (
148+ config ["max_haplotypes_number_in_phase" ]
149+ )
150+ )
103151 if config ["nodes_for_plan_A" ]:
104152 print ("\t Nodes in plan A: {}" .format (config ["nodes_for_plan_A" ]))
105153 print ("\t Save space mode: {}" .format (config ["save_mode" ]))
106- print ('****************************************************************************************************' )
107-
154+ print (
155+ "****************************************************************************************************"
156+ )
108157
109158 all_loci_set = set ()
110159 for _ , val in config ["loci_map" ].items ():
111160 all_loci_set .add (str (val ))
112161
113- config ["full_loci" ] = '' .join (sorted (all_loci_set ))
162+ config ["full_loci" ] = "" .join (sorted (all_loci_set ))
114163 # Perform imputation
115164 graph = Graph (config )
116- graph .build_graph (config ["node_file" ], config ["top_links_file" ], config ["edges_file" ])
165+ graph .build_graph (
166+ config ["node_file" ], config ["top_links_file" ], config ["edges_file" ]
167+ )
117168 imputation = Imputation (graph , config )
118169
119170 # Create output directory if it doesn't exist
@@ -123,5 +174,5 @@ def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_gra
123174 imputation .impute_file (config )
124175
125176 # Profiler end
126- #pr.disable()
127- #pr.print_stats(sort="time")
177+ # pr.disable()
178+ # pr.print_stats(sort="time")
0 commit comments