Skip to content

Commit 0dd535c

Browse files
committed
add isoform level clustering benchmarking code and source data
1 parent 40b4da9 commit 0dd535c

File tree

4 files changed

+24167
-0
lines changed

4 files changed

+24167
-0
lines changed

misc/cluster_benchmark.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import csv
2+
from sklearn import metrics
3+
import subprocess as sub
4+
import argparse
5+
6+
parser = argparse.ArgumentParser(description='RATTLE running clustering and cluster summary step')
7+
parser.add_argument('input', type=str, help='input reads file(required)')
8+
parser.add_argument('output', type=str, help='output folder (required)')
9+
parser.add_argument('threads', type=int, help='threads number to run RATTLE (required)')
10+
parser.add_argument('--rna', action='store_true', help='whether to use RNA mode (instead of cDNA)')
11+
args = parser.parse_args()
12+
13+
if args.rna:
14+
rattle_run = "./rattle cluster -i " + args.input + " -t " + str(args.threads) + " -o " + args.output + " --rna --iso"
15+
else:
16+
rattle_run = "./rattle cluster -i " + args.input + " -t " + str(args.threads) + " -o " + args.output + " --iso"
17+
p = sub.Popen(rattle_run, shell=True)
18+
p.wait()
19+
print("RATTLE isoform-level clustering completed")
20+
21+
rattle_run = "./rattle cluster_summary -i " + args.input + " -c " + args.output + "/clusters.out > " + args.output + "/summary.tsv"
22+
p = sub.Popen(rattle_run, shell=True)
23+
p.wait()
24+
print("RATTLE cluster summary completed")
25+
26+
filename = args.output + "/summary.tsv"
27+
csv_reader = csv.reader(open(filename))
28+
tsp = {}
29+
with open('./toyset/cluster_benchmark/input/sample_ref.fa', 'r') as f:
30+
for count, line in enumerate(f, start=1):
31+
if count % 2 == 1:
32+
l = line.split()
33+
tsp[l[0][1:]] = l[3][5:]
34+
35+
labels_true_t = []
36+
labels_pred =[]
37+
labels_true_g = []
38+
for line in csv_reader:
39+
labels_true_t.append(line[1])
40+
labels_pred.append(line[2])
41+
labels_true_g.append(tsp[line[1]])
42+
print("homogeneity score with (sample reads and human transcriptome) is: {:.2f}%".format(metrics.homogeneity_score(labels_true_t, labels_pred) * 100))
43+
print("completeness score with (sample reads and human transcriptome) is: {:.2f}%".format(metrics.completeness_score(labels_true_t, labels_pred) * 100))
44+
print("homogeneity score with (sample reads and human gene) is: {:.2f}%".format(metrics.homogeneity_score(labels_true_g, labels_pred) * 100))
45+
print("completeness score with (sample reads and human gene) is: {:.2f}%".format(metrics.completeness_score(labels_true_g, labels_pred) * 100))

0 commit comments

Comments
 (0)