|
| 1 | +import argparse |
| 2 | +import cProfile |
| 3 | +import json |
| 4 | +import pathlib |
| 5 | +import sys |
| 6 | +import os |
| 7 | + |
| 8 | +sys.path.insert(0, os.path.dirname(os.path.realpath(__file__))) |
| 9 | + |
| 10 | +from .imputation.impute import Imputation |
| 11 | +from .imputation.networkx_graph import Graph |
| 12 | + |
| 13 | +# Profiler start |
| 14 | +#pr = cProfile.Profile() |
| 15 | +#pr.enable() |
| 16 | + |
| 17 | +def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_graph = "", project_dir_in_file = ""): |
| 18 | + |
| 19 | + configuration_file = conf_file |
| 20 | + |
| 21 | + #project_dir = ""# "../" |
| 22 | + #output_dir = "output/" |
| 23 | + |
| 24 | + |
| 25 | + # Read configuration file and load properties |
| 26 | + with open(configuration_file) as f: |
| 27 | + json_conf = json.load(f) |
| 28 | + |
| 29 | + graph_files_path = json_conf.get("graph_files_path") |
| 30 | + if graph_files_path[-1] != '/': |
| 31 | + graph_files_path += '/' |
| 32 | + output_dir = json_conf.get("imuptation_out_path", "output") |
| 33 | + if output_dir[-1] != '/': |
| 34 | + output_dir += '/' |
| 35 | + config = { |
| 36 | + "planb": json_conf.get('planb', True), |
| 37 | + "pops": json_conf.get('populations'), |
| 38 | + "priority": json_conf.get('priority'), |
| 39 | + "epsilon": json_conf.get('epsilon', 1e-3), |
| 40 | + "number_of_results": json_conf.get('number_of_results', 1000), |
| 41 | + "number_of_pop_results": json_conf.get('number_of_pop_results', 100), |
| 42 | + "output_MUUG": json_conf.get("output_MUUG", True), |
| 43 | + "output_haplotypes": json_conf.get("output_haplotypes", False), |
| 44 | + "node_file": project_dir_graph + graph_files_path + json_conf.get("node_csv_file"), |
| 45 | + "top_links_file": project_dir_graph + graph_files_path + json_conf.get("top_links_csv_file"), |
| 46 | + "edges_file": project_dir_graph + graph_files_path +json_conf.get("edges_csv_file"), |
| 47 | + "imputation_input_file": project_dir_in_file + json_conf.get("imputation_in_file"), |
| 48 | + "imputation_out_umug_freq_file": output_dir + json_conf.get("imputation_out_umug_freq_filename"), |
| 49 | + "imputation_out_umug_pops_file": output_dir + json_conf.get("imputation_out_umug_pops_filename"), |
| 50 | + "imputation_out_hap_freq_file": output_dir + json_conf.get("imputation_out_hap_freq_filename"), |
| 51 | + "imputation_out_hap_pops_file": output_dir + json_conf.get("imputation_out_hap_pops_filename"), |
| 52 | + "imputation_out_miss_file": output_dir + json_conf.get("imputation_out_miss_filename"), |
| 53 | + "imputation_out_problem_file": output_dir + json_conf.get("imputation_out_problem_filename"), |
| 54 | + "factor_missing_data": json_conf.get("factor_missing_data", 0.01), |
| 55 | + "loci_map": json_conf.get("loci_map", {"A": 1, "B":3, "C": 2, "DQB1": 4, "DRB1": 5} ), |
| 56 | + "matrix_planb": json_conf.get("Plan_B_Matrix", [ |
| 57 | + [[1, 2, 3, 4, 5]], |
| 58 | + [[1, 2, 3], [4, 5]], |
| 59 | + [[1], [2, 3], [4, 5]], |
| 60 | + [[1, 2, 3], [4], [5]], |
| 61 | + [[1], [2, 3], [4], [5]], |
| 62 | + [[1], [2], [3], [4], [5]] |
| 63 | + ]), |
| 64 | + "pops_count_file": project_dir_graph + json_conf.get("pops_count_file",'' ), |
| 65 | + "use_pops_count_file": json_conf.get("pops_count_file",False), |
| 66 | + "number_of_options_threshold": json_conf.get("number_of_options_threshold", 100000), |
| 67 | + "max_haplotypes_number_in_phase": json_conf.get("max_haplotypes_number_in_phase",100 ), |
| 68 | + "bin_imputation_input_file": project_dir_in_file + json_conf.get("bin_imputation_in_file", "None"), |
| 69 | + "nodes_for_plan_A": json_conf.get("Plan_A_Matrix", []), |
| 70 | + "save_mode": json_conf.get("save_space_mode", False), |
| 71 | + "UNK_priors" : json_conf.get("UNK_priors", "MR") |
| 72 | + |
| 73 | + } |
| 74 | + |
| 75 | + # Display the configurations we are using |
| 76 | + print('****************************************************************************************************') |
| 77 | + print("Performing imputation based on:") |
| 78 | + print("\tPopulation: {}".format(config["pops"])) |
| 79 | + print("\tPriority: {}".format(config["priority"])) |
| 80 | + print("\tUNK priority: {}".format(config["UNK_priors"])) |
| 81 | + print("\tEpsilon: {}".format(config["epsilon"])) |
| 82 | + print("\tPlan B: {}".format(config["planb"])) |
| 83 | + print("\tNumber of Results: {}".format(config["number_of_results"])) |
| 84 | + print("\tNumber of Population Results: {}".format(config["number_of_pop_results"])) |
| 85 | + print("\tNodes File: {}".format(config["node_file"])) |
| 86 | + print("\tTop Links File: {}".format(config["edges_file"])) |
| 87 | + print("\tInput File: {}".format(config["imputation_input_file"])) |
| 88 | + print("\tOutput UMUG Format: {}".format(config["output_MUUG"])) |
| 89 | + print("\tOutput UMUG Freq Filename: {}".format(config["imputation_out_umug_freq_file"])) |
| 90 | + print("\tOutput UMUG Pops Filename: {}".format(config["imputation_out_umug_pops_file"])) |
| 91 | + print("\tOutput Haplotype Format: {}".format(config["output_haplotypes"])) |
| 92 | + print("\tOutput HAP Freq Filename: {}".format(config["imputation_out_hap_freq_file"])) |
| 93 | + print("\tOutput HAP Pops Filename: {}".format(config["imputation_out_hap_pops_file"])) |
| 94 | + print("\tOutput Miss Filename: {}".format(config["imputation_out_miss_file"])) |
| 95 | + print("\tOutput Problem Filename: {}".format(config["imputation_out_problem_file"])) |
| 96 | + print("\tFactor Missing Data: {}".format(config["factor_missing_data"])) |
| 97 | + print("\tLoci Map: {}".format(config["loci_map"])) |
| 98 | + print("\tPlan B Matrix: {}".format(config["matrix_planb"])) |
| 99 | + print("\tPops Count File: {}".format(config["pops_count_file"])) |
| 100 | + print("\tUse Pops Count File: {}".format(config["use_pops_count_file"])) |
| 101 | + print("\tNumber of Options Threshold: {}".format(config["number_of_options_threshold"])) |
| 102 | + print("\tMax Number of haplotypes in phase: {}".format(config["max_haplotypes_number_in_phase"])) |
| 103 | + if config["nodes_for_plan_A"]: |
| 104 | + print("\tNodes in plan A: {}".format(config["nodes_for_plan_A"])) |
| 105 | + print("\tSave space mode: {}".format(config["save_mode"])) |
| 106 | + print('****************************************************************************************************') |
| 107 | + |
| 108 | + |
| 109 | + all_loci_set = set() |
| 110 | + for _, val in config["loci_map"].items(): |
| 111 | + all_loci_set.add(str(val)) |
| 112 | + |
| 113 | + config["full_loci"] = ''.join(sorted(all_loci_set)) |
| 114 | + # Perform imputation |
| 115 | + graph = Graph(config) |
| 116 | + graph.build_graph(config["node_file"], config["top_links_file"], config["edges_file"]) |
| 117 | + imputation = Imputation(graph, config) |
| 118 | + |
| 119 | + # Create output directory if it doesn't exist |
| 120 | + pathlib.Path(output_dir).mkdir(parents=False, exist_ok=True) |
| 121 | + |
| 122 | + # Write out the results from imputation |
| 123 | + imputation.impute_file(config) |
| 124 | + |
| 125 | + # Profiler end |
| 126 | + #pr.disable() |
| 127 | + #pr.print_stats(sort="time") |
0 commit comments