|
1 | 1 | #!/usr/bin/env python |
| 2 | +"""Adds risk score columns to a report file and computes risk scores for each entry. |
| 3 | +
|
| 4 | +This module processes a tab-separated report file, calculates risk score differences, |
| 5 | +and writes the results to a new file with updated columns. It supports both primary |
| 6 | +and alternative target scenarios based on user input. |
| 7 | +""" |
| 8 | + |
| 9 | +from typing import List, Tuple |
2 | 10 |
|
3 | 11 | import sys |
4 | 12 |
|
| 13 | +# risk score column names |
| 14 | +RISKCOLNAMES = ["Highest_CFD_Risk_Score", "Highest_CFD_Absolute_Risk_Score"] |
| 15 | + |
| 16 | + |
| 17 | +def add_risk_score_columns(header: List[str], alternative: bool) -> List[str]: |
| 18 | + """Appends risk score columns and optionally a cluster ID to the header list. |
| 19 | +
|
| 20 | + This function updates the provided header list by adding predefined risk score |
| 21 | + columns, and adds a cluster ID column if the alternative flag is set. |
| 22 | +
|
| 23 | + Args: |
| 24 | + header: The list of column names to be updated. |
| 25 | + alternative: Whether to add the cluster ID column. |
| 26 | +
|
| 27 | + Returns: |
| 28 | + List[str]: The updated list of column names including risk score columns. |
| 29 | + """ |
| 30 | + header.extend(iter(RISKCOLNAMES)) |
| 31 | + if alternative: |
| 32 | + header.append("CLUSTER_ID") |
| 33 | + return header |
| 34 | + |
| 35 | + |
| 36 | +def _compute_risk_score(line: str) -> Tuple[float, float]: |
| 37 | + """Calculates the risk score difference and its absolute value from a line of |
| 38 | + report data. |
| 39 | +
|
| 40 | + This function extracts two score values from the input line, computes their |
| 41 | + difference, and returns both the difference and its absolute value. |
| 42 | +
|
| 43 | + Args: |
| 44 | + line: A tab-separated string containing report data. |
| 45 | +
|
| 46 | + Returns: |
| 47 | + Tuple[float, float]: The risk score difference and its absolute value. |
| 48 | + """ |
| 49 | + fields = line.strip().split("\t") |
| 50 | + score, score_alt = float(fields[20]), float(fields[21]) |
| 51 | + score_diff = score - score_alt |
| 52 | + return score_diff, abs(score_diff) |
| 53 | + |
| 54 | + |
| 55 | +def compute_risk_score(report_fname: str, report_outfname: str, alternative: bool): |
| 56 | + """Reads a report file, computes risk scores for each line, and writes the |
| 57 | + results to a new file. |
| 58 | +
|
| 59 | + This function processes the input report, appends risk score columns, and |
| 60 | + outputs the updated data. It handles both primary and alternative target |
| 61 | + scenarios based on the provided flag. |
| 62 | +
|
| 63 | + Args: |
| 64 | + report_fname: Path to the input report file. |
| 65 | + report_outfname: Path to the output report file. |
| 66 | + alternative: Whether to process as alternative targets. |
| 67 | +
|
| 68 | + Returns: |
| 69 | + None |
| 70 | +
|
| 71 | + Raises: |
| 72 | + OSError: If an error occurs while reading or writing files. |
| 73 | + """ |
| 74 | + try: |
| 75 | + with open(report_fname, mode="r") as fin, open( |
| 76 | + report_outfname, mode="w" |
| 77 | + ) as fout: |
| 78 | + header = fin.readline().strip().split("\t") # read file header |
| 79 | + header = add_risk_score_columns( |
| 80 | + header, alternative |
| 81 | + ) # append risk score columns |
| 82 | + fout.write("\t".join(header) + "\n") # write header to out put report |
| 83 | + for line in fin: |
| 84 | + score_diff, score_diff_abs = _compute_risk_score(line) |
| 85 | + fout.write(f"{line}\t{score_diff}\t{score_diff_abs}\n") |
| 86 | + except (IOError, Exception) as e: |
| 87 | + raise OSError( |
| 88 | + f"An error occurred while computing risk scores for {report_fname}" |
| 89 | + ) from e |
| 90 | + |
| 91 | + |
| 92 | +def risk_score() -> None: |
| 93 | + """Parses command-line arguments and computes risk scores for a report file. |
| 94 | +
|
| 95 | + This function reads input and output file paths and a flag from the command |
| 96 | + line, then processes the report to add risk score columns accordingly. |
| 97 | +
|
| 98 | + Returns: |
| 99 | + None |
| 100 | + """ |
| 101 | + report_fname, report_outfname, alternative = sys.argv[1:4] # read input args |
| 102 | + alternative = alternative == "True" # are primary or alternative targets? |
| 103 | + # compute risk score on CFD/CRISTA |
| 104 | + compute_risk_score(report_fname, report_outfname, alternative) |
| 105 | + |
5 | 106 |
|
6 | | -file_in = sys.argv[1] |
7 | | -file_out = sys.argv[2] |
8 | | -alt = sys.argv[3] |
9 | | -alt = alt == "True" |
10 | | -with open(file_in, "r") as fin: |
11 | | - with open(file_out, "w") as fout: |
12 | | - header = fin.readline().strip().split("\t") |
13 | | - # header.insert(22, 'Highest_CFD_Risk_Score') |
14 | | - # header.insert(23, 'Highest_CFD_Absolute_Risk_Score') |
15 | | - # header.append('MMBLG_CFD_Risk_Score') |
16 | | - # header.append('MMBLG_CFD_Absolute_Risk_Score') |
17 | | - header.append("Highest_CFD_Risk_Score") |
18 | | - header.append("Highest_CFD_Absolute_Risk_Score") |
19 | | - if alt: |
20 | | - header.append("CLUSTER_ID") |
21 | | - fout.write("\t".join(header) + "\n") |
22 | | - for line in fin: |
23 | | - splitted = line.strip().split("\t") |
24 | | - cfd_diff = float(splitted[20]) - float(splitted[21]) |
25 | | - abs_diff = abs(cfd_diff) |
26 | | - # mmblg_cfd_diff = float(splitted[42]) - float(splitted[43]) |
27 | | - # mmblg_abs_diff = abs(mmblg_cfd_diff) |
28 | | - fout.write( |
29 | | - "\t".join(splitted) + "\t" + str(cfd_diff) + "\t" + str(abs_diff) + "\n" |
30 | | - ) |
31 | | - # if alt: |
32 | | - # fout.write('\t'.join(splitted[:22])+'\t'+"{:.3f}".format(cfd_diff)+'\t'+"{:.3f}".format(abs_diff)+"\t"+"\t".join( |
33 | | - # splitted[22:-1])+'\t'+"{:.3f}".format(mmblg_cfd_diff)+'\t'+"{:.3f}".format(mmblg_abs_diff)+"\t"+splitted[-1]+'\n') |
34 | | - # else: |
35 | | - # fout.write('\t'.join(splitted[:22])+'\t'+"{:.3f}".format(cfd_diff)+'\t'+"{:.3f}".format(abs_diff)+"\t"+"\t".join( |
36 | | - # splitted[22:])+'\t'+"{:.3f}".format(mmblg_cfd_diff)+'\t'+"{:.3f}".format(mmblg_abs_diff)+'\n') |
| 107 | +risk_score() |
0 commit comments