@@ -2,30 +2,55 @@ process OLGA {
22 tag " ${ sample_meta[0]} "
33 label ' process_low'
44 container " ghcr.io/break-through-cancer/bulktcr:latest"
5-
5+
66 input:
77 tuple val(sample_meta), path(count_table)
8-
8+
99 output:
10- path " ${ count_table.baseName} _tcr_generation_probabilities.tsv" , emit: " olga_output"
11-
10+ path " ${ count_table.baseName} _tcr_pgen.tsv" , emit: " olga_output"
11+ path " ${ count_table.baseName} _tcr_pgen_histogram.png"
12+
1213 script:
1314 """
1415 # Extract vector of cdr3 aa, dropping null values
15-
1616 cat > dropAA.py <<EOF
17-
1817 import pandas as pd
19-
18+
2019 df = pd.read_csv("${ count_table} ", sep="\t ")
2120 df = df.dropna(subset=["aminoAcid"])
2221 df = df["aminoAcid"]
2322 df.to_csv("output.tsv", sep="\t ", index=False, header=False)
24-
2523 EOF
26-
24+
2725 python dropAA.py
28-
29- olga-compute_pgen --humanTRB -i output.tsv -o "${ count_table.baseName} _tcr_generation_probabilities.tsv"
26+
27+ olga-compute_pgen --humanTRB -i output.tsv -o "${ count_table.baseName} _tcr_pgen.tsv"
28+
29+ python - <<EOF
30+ import pandas as pd
31+ import numpy as np
32+ import matplotlib.pyplot as plt
33+
34+ # Load TSV with no header
35+ df = pd.read_csv('${ count_table.baseName} _tcr_pgen.tsv', sep='\t ', header=None, usecols=[0, 1], names=['CDR3b', 'probability'])
36+
37+ # Drop rows where pgen is 0
38+ df = df[df['probability'] != 0]
39+ log_probs = np.log10(df['probability'])
40+
41+ # Plot histogram
42+ plt.figure(figsize=(8, 5))
43+ plt.hist(log_probs, bins=30, density=True, edgecolor='black')
44+
45+ # Label with LaTeX formatting
46+ plt.xlabel('log_10 Generation Probability')
47+ plt.ylabel('Probability Density')
48+ plt.title(f'${ count_table.baseName} TCR Generation Probability Histogram')
49+ # plt.grid(True)
50+
51+ # Save to file
52+ plt.savefig("${ count_table.baseName} _tcr_pgen_histogram.png", dpi=300, bbox_inches="tight")
53+ plt.close()
54+ EOF
3055 """
31- }
56+ }
0 commit comments