Skip to content

Commit 8841dee

Browse files
committed
Tidying up comments
1 parent af36eb2 commit 8841dee

File tree

5 files changed

+26
-36
lines changed

5 files changed

+26
-36
lines changed

BuildSuperTranscript.py

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
#A quick little first pass script to read a BLAT output and then:
1+
# Author: Anthony Hawkins
2+
3+
#A script to construct a SuperTranscript given a .fasta file of transcript sequence
24
# 1) Determine block sequences
35
# 2) Construct graph structure that stores each block with eges detailing how blocks are connect within transcripts
46
# 3) Sort blocks from the graph into topological order
@@ -16,14 +18,12 @@
1618
sys.setrecursionlimit(10000)
1719

1820

19-
#Define a function to be used recursively to check for each succesor node whether it only has one in or out
20-
21+
#Define a function to be used recursively to check for each succesor node whether it only has one in or out
2122
def successor_check(graph,n,tmerge):
2223
ess = [node for node in graph.successors(n)] #Get list of succesors
2324

2425
#Check for all successors
2526
for s in ess:
26-
#if(len(graph.out_edges([s])) == 1 and len(graph.in_edges([s])) == 1): #I.e. if only one outgoing edge and one incoming edge it belongs to same block
2727
if(len(graph.in_edges([s])) <= 1 and len(ess) <= 1): #Succesor node only has one incoming path and is the only option for the previous node
2828
tmerge.append(s)
2929
successor_check(graph,s,tmerge)
@@ -143,7 +143,7 @@ def filt_dir(table):
143143

144144

145145

146-
146+
#Main function to produce a SuperTranscript
147147
def SuperTran(fname,verbose=False):
148148

149149
#Start Clock for timing
@@ -173,7 +173,7 @@ def SuperTran(fname,verbose=False):
173173
transcript_status = len(transcripts)
174174
whirl_status = 0
175175

176-
#If there is only one transcript in this file, then simply that transcript is the super transcript
176+
#If there is only one transcript in this file, then simply that transcript is the super transcript...
177177
if(len(transcripts) == 1):
178178
if(verbose): print("One\n")
179179
seq = next(iter(transcripts.values())) #Python 3 specific codee...
@@ -184,7 +184,7 @@ def SuperTran(fname,verbose=False):
184184
try:
185185
seq, anno, whirl_status = BuildGraph(fname,transcripts,verbose)
186186

187-
except: #Graph building failed, just take longest transcript or (concatenate all transcripts)
187+
except: #Graph building failed, just take longest transcript (or concatenate all transcripts)
188188
temp = 0
189189
seq = ''
190190
print('FAILED to construct')
@@ -261,14 +261,6 @@ def BuildGraph(fname,transcripts,verbose=False):
261261

262262

263263
for i in range(0,len(bData)):
264-
265-
#Check explicitly that there are no gaps - OBS these "gaps" are actually gaps between blat blocks and not actually gaps within blat blocks as i initially thought
266-
#if( bData.iloc[i,4] > 0 or bData.iloc[i,6] > 0):
267-
# continue
268-
269-
#Don't allign the transcripts against each other twice...
270-
#I.e. BLAT does T1 vs T2 but also T2 vs T1 (which should be the same give or take)
271-
272264

273265
#Extract the info
274266
seq=list(transcripts[bData.iloc[i,9]]) #Get sequence from query name
@@ -283,10 +275,6 @@ def BuildGraph(fname,transcripts,verbose=False):
283275
qStart.append(int(qStarts[j]))
284276
qName.append(bData.iloc[i,9])
285277

286-
#OBS
287-
#For now assume that there is no contradiction in the directionality (i.e. all blocks in psl are consistent with each contig being in the defined direction relative to each pair)
288-
#if(trandir[bData.iloc[i,9]] == '-' and trandir[bData.iloc[i,13]] == '-'
289-
290278
if(verbose): print("Constructing and merging nodes in graphs based on Blocks and Transcripts")
291279

292280

@@ -455,13 +443,6 @@ def BuildGraph(fname,transcripts,verbose=False):
455443
M_node = None
456444
Multi = 100000000
457445

458-
#Find Highest multiplicity node in loop to use for breaking point of cycle
459-
#for node in whirl:
460-
# temp = len(C.out_edges([node])) + len(C.out_edges([node]))
461-
# if(temp >= Multi):
462-
# Multi = temp
463-
# M_node = node
464-
465446
#Find the node with smallest sequence and break there (instead of the highest multiplicty)
466447
for node in whirl:
467448
temp = len(C.node[node]['Base'])

Checker.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
#Author: Anthony Hawkins
2+
13
#A script to systematically check for each gene whether the SuperTranscript builder worked ok
4+
# And to produce the alternate annotation for the SuperTranscript
5+
26
import multiprocessing, logging
37
from multiprocessing import Pool
48
from multiprocessing import Process

Lace.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1+
#Author: Anthony Hawkins
2+
13
#Lacing together different transcripts into a SuperTranscript
4+
#This is the main script which parrallelises making a SuperTranscript for each gene/cluster
5+
#The main inputs are .fasta file containing all transcripts in all the genes/cluster you wish to constuct
6+
#and a tab delimited text file of two columns with the mapping of transcripts <-> gene
27

38

49
import multiprocessing, logging
@@ -179,10 +184,12 @@ def Split(genome,corsetfile,ncore,maxTran,outdir):
179184

180185
if(args.alternate):
181186
cwd = os.getcwd()
187+
182188
#Change to output directory
183189
os.chdir(args.outputDir)
184190
print("Making Alternate Annotation and checks")
185191
Checker('SuperDuper.fasta',args.cores)
192+
186193
#Change back
187194
os.chdir(cwd)
188195
print('Done')

Mobius.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1-
#A little script to construct an annotation from an SJ.out.tab file
1+
#Author: Anthony Hawkins
2+
#A little script to construct an annotation from an SJ.out.tab file, a standard STAR output.
3+
# This creates the dynamic block annotation
4+
25
import argparse
36
import pandas as pd
47
import os, sys
58

69
def Mobius(sjfile,gfile,ft,flat_ann):
7-
#First read in the SJ.out.tab as sys.argv[1]
10+
11+
#First read in the SJ.out.tab as sys.argv[1]
812
sj = pd.read_csv(sjfile,sep='\t',header=None,names=['Gene','Start','End','strand','intron motif','Annotated','Unique','Multi','Overhang'])
913

1014
#Read in SuperTranscript fasta file
@@ -28,7 +32,7 @@ def Mobius(sjfile,gfile,ft,flat_ann):
2832
for i in range(0,len(sj['Gene'])):
2933
curr_gene = sj.iloc[i,0]
3034
if(curr_gene not in slist.keys()): slist[curr_gene] = [1]
31-
if((sj.iloc[i,7] + sj.iloc[i,8]) > 5): #More than 10 reads (either unique or multi spanninf junction)
35+
if((sj.iloc[i,7] + sj.iloc[i,8]) > 5): #More than 5 reads (either unique or multi spanning junction)
3236
slist[curr_gene].append(int(sj.iloc[i,1])) #This is actually the intron start part (i.e one base over)
3337
slist[curr_gene].append(int(sj.iloc[i,2])+1) #This is the end of the exon-exon junction, so need to add one for the actual exon start place
3438

STViewer.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#Author: Anthony Hawkins
12
#Visualise a given gene in your super transcript
23

34
import pandas as pd
@@ -59,22 +60,16 @@ def Visualise(gene_name):
5960
gs = gridspec.GridSpec(2,1,height_ratios=[4,1])
6061
ax1=plt.subplot(gs[0])
6162
accum = 0
62-
#for row in range(0,len(gff_data)):
63-
# size = 1 + int(gff_data.iloc[row,4]) - int(gff_data.iloc[row,3]) #1+ for converting co-ordinate systems
64-
# plt.barh(len(transcripts),size,color='#ffc024',left=accum,alpha=0.8)
65-
# accum=accum+size
6663

6764
plt.barh(len(transcripts),ST_length,color='#ffc024',left=0)
6865
plot_dict = {}
6966
col_dict = {}
7067
labs = []
7168

72-
#col2=iter(cm.plasma(np.linspace(0,1,len(transcripts))))
7369
col2=iter(cm.terrain(np.linspace(0,1,len(transcripts))))
7470
for i,key in enumerate(transcripts):
7571
plot_dict[key] = i
7672
col_dict[key] = next(col2)
77-
#lab = "T"+str(i)
7873
lab =""
7974
labs.append(lab)
8075

@@ -103,7 +98,6 @@ def Visualise(gene_name):
10398
width=0.8
10499
labs.append('Super')
105100
plt.yticks(ind + width/2.,labs,fontsize="medium",fontweight="semibold")
106-
#plt.title('Breakdown of Super Transcript')
107101
plt.ylabel('Transcripts',fontdict=font)
108102

109103

0 commit comments

Comments
 (0)