diff --git a/grim/grim.py b/grim/grim.py index 070628c..e930402 100644 --- a/grim/grim.py +++ b/grim/grim.py @@ -54,7 +54,7 @@ def graph_freqs(conf_file="", for_em=False, em_pop=None): ) -def impute(conf_file=""): +def impute(conf_file="", hap_pop_pair = False, graph = None): project_dir_in_file, project_dir_graph = "", "" if conf_file == "": @@ -70,7 +70,8 @@ def impute(conf_file=""): project_dir_in_file = ( os.path.dirname(os.path.realpath(__file__)).replace("/grim", "") + "/" ) - run_impute(conf_file, project_dir_graph, project_dir_in_file) + graph = run_impute(conf_file, project_dir_graph, project_dir_in_file, hap_pop_pair, graph) + return graph def impute_instance(config, graph, count_by_prob=None): diff --git a/grim/run_impute_def.py b/grim/run_impute_def.py index ff1bd68..305862d 100644 --- a/grim/run_impute_def.py +++ b/grim/run_impute_def.py @@ -4,6 +4,7 @@ import pathlib import sys import os +from pathlib import Path sys.path.insert(0, os.path.dirname(os.path.realpath(__file__))) @@ -15,10 +16,34 @@ # pr.enable() +def full_path(output, original_path): + """ + Modifies the given path by replacing the last segment with the output directory + and appending the original last segment to the new path. + + Parameters: + original_path (str): The original file path. + output (str): The new directory name to replace the last segment. + + Returns: + str: The modified file path. + """ + # Convert to Path object + path = Path(original_path) + + # Construct the new path + new_path = path.parent / output / path.name + + # Return the new path as a string + return str(new_path) + + def run_impute( conf_file="../conf/minimal-configuration.json", project_dir_graph="", project_dir_in_file="", + hap_pop_pair = False, + graph = None ): configuration_file = conf_file @@ -56,18 +81,12 @@ def run_impute( + json_conf.get("edges_csv_file"), "imputation_input_file": project_dir_in_file + json_conf.get("imputation_in_file"), - "imputation_out_umug_freq_file": output_dir - + json_conf.get("imputation_out_umug_freq_filename"), - "imputation_out_umug_pops_file": output_dir - + json_conf.get("imputation_out_umug_pops_filename"), - "imputation_out_hap_freq_file": output_dir - + json_conf.get("imputation_out_hap_freq_filename"), - "imputation_out_hap_pops_file": output_dir - + json_conf.get("imputation_out_hap_pops_filename"), - "imputation_out_miss_file": output_dir - + json_conf.get("imputation_out_miss_filename"), - "imputation_out_problem_file": output_dir - + json_conf.get("imputation_out_problem_filename"), + "imputation_out_umug_freq_file": full_path(output_dir, json_conf.get("imputation_out_umug_freq_filename")), + "imputation_out_umug_pops_file": full_path(output_dir, json_conf.get("imputation_out_umug_pops_filename")), + "imputation_out_hap_freq_file": full_path(output_dir, json_conf.get("imputation_out_hap_freq_filename")), + "imputation_out_hap_pops_file": full_path(output_dir, json_conf.get("imputation_out_hap_pops_filename")), + "imputation_out_miss_file": full_path(output_dir, json_conf.get("imputation_out_miss_filename")), + "imputation_out_problem_file": full_path(output_dir, json_conf.get("imputation_out_problem_filename")), "factor_missing_data": json_conf.get("factor_missing_data", 0.01), "loci_map": json_conf.get( "loci_map", {"A": 1, "B": 3, "C": 2, "DQB1": 4, "DRB1": 5} @@ -161,18 +180,21 @@ def run_impute( config["full_loci"] = "".join(sorted(all_loci_set)) # Perform imputation - graph = Graph(config) - graph.build_graph( - config["node_file"], config["top_links_file"], config["edges_file"] - ) + if graph==None: + graph = Graph(config) + graph.build_graph( + config["node_file"], config["top_links_file"], config["edges_file"] + ) imputation = Imputation(graph, config) # Create output directory if it doesn't exist pathlib.Path(output_dir).mkdir(parents=False, exist_ok=True) # Write out the results from imputation - imputation.impute_file(config) + imputation.impute_file(config, em_mr=hap_pop_pair) # Profiler end # pr.disable() # pr.print_stats(sort="time") + + return graph