Skip to content

Commit 0bc9cc0

Browse files
committed
Add sample TCR convergence
1 parent dc978fc commit 0bc9cc0

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

modules/local/convergence.nf

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
process CONVERGENCE {
2+
tag "${sample_meta[0]}"
3+
label 'process_low'
4+
container "ghcr.io/break-through-cancer/bulktcr:latest"
5+
6+
input:
7+
tuple val(sample_meta), path(count_table)
8+
9+
output:
10+
path "${count_table.baseName}_tcr_convergence.tsv", emit: "convergence_output"
11+
path "${count_table.baseName}_tcr_convergence_histogram.png"
12+
13+
script:
14+
"""
15+
# Extract vector of cdr3 aa, dropping null values
16+
python - <<EOF
17+
import pandas as pd
18+
import numpy as np
19+
import matplotlib.pyplot as plt
20+
21+
# Load your TCR data (make sure the file has 'cdr3_aa' and 'cdr3_nt' columns)
22+
df = pd.read_csv("${count_table}", sep="\t", usecols=["aminoAcid", "nucleotide"])
23+
df = df.dropna(subset=["aminoAcid"])
24+
25+
# Group by amino acid sequence and count unique nucleotide sequences (convergence)
26+
convergence_df = (
27+
df.groupby("aminoAcid")["nucleotide"]
28+
.nunique()
29+
.reset_index(name="convergence")
30+
)
31+
32+
# Sort by convergence count, descending
33+
convergence_df = convergence_df.sort_values(by="convergence", ascending=False)
34+
35+
# Export
36+
convergence_df.to_csv("${count_table.baseName}_tcr_convergence.tsv", sep="\t", index=False)
37+
38+
# Plot histogram
39+
convergence = convergence_df['convergence'].values
40+
average_convergence = convergence_df["convergence"].mean()
41+
42+
# Create integer bin edges from 0 to max(data)
43+
bins = np.arange(min(convergence), max(convergence) + 2) # +2 to include the last value as a bin edge
44+
45+
plt.figure(figsize=(8, 5))
46+
plt.hist(convergence, bins=bins, edgecolor='black', align='left')
47+
plt.xticks(bins[:-1]) # whole number positions and labels
48+
plt.yscale('log')
49+
50+
plt.xlabel('TCR Convergence Number')
51+
plt.ylabel('TCR Convergence Frequency (log scale)')
52+
plt.title(f'${count_table.baseName} TCR Convergence Histogram, Average: {average_convergence:.2f}')
53+
54+
# Save to file
55+
plt.savefig("${count_table.baseName}_tcr_convergence_histogram.png", dpi=300, bbox_inches="tight")
56+
plt.close()
57+
58+
EOF
59+
"""
60+
}

subworkflows/local/sample.nf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ include { SAMPLE_PLOT } from '../../modules/local/sample_plot'
1010
include { TCRDIST3_MATRIX } from '../../modules/local/tcrdist3_matrix'
1111
include { TCRDIST3_PLOT } from '../../modules/local/tcrdist3_plot'
1212
include { OLGA } from '../../modules/local/olga'
13+
include { CONVERGENCE } from '../../modules/local/convergence'
1314

1415
/*
1516
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -69,6 +70,8 @@ workflow SAMPLE {
6970

7071
OLGA ( sample_map )
7172

73+
CONVERGENCE ( sample_map )
74+
7275
// emit:
7376
// sample_stats_csv
7477
// v_family_csv

0 commit comments

Comments
 (0)