@@ -37,56 +37,57 @@ def GeneRankCombination(treatment):
3737 os .chdir (GeneDir )
3838 treatment_files = [f for f in os .listdir (GeneDir ) if treatment in f \
3939 and metric in f and 'combined' not in f ]
40- treatment_files .sort ()
41- K = len (treatment_files )
42- ResultTable = pandas .DataFrame ()
43- X1 = pandas .read_table (treatment_files [0 ], sep = '\t ' )
44- # Pre-process gene rank tables in case of STARS
45- if metric == 'STARS' :
46- # Compute consensus gene list (present in all replicates)
47- print ('Computing consensus gene list from STARS output ...' )
48- Genes_0 = set (X1 ['gene' ])
49- for treatment_file in treatment_files :
50- X = pandas .read_table (treatment_file , sep = '\t ' )
51- Genes = set (X ['gene' ])
52- Genes_0 = Genes_0 .intersection (Genes )
53- G = len (Genes_0 )
54- else :
55- G = len (X1 )
56- # Read replicates
57- chi = list (numpy .zeros (G ))
58- k = 0
59- for treatment_file in treatment_files :
60- k += 1
61- print ('Reading ' + treatment + ' replicate ' + str (k )+ ' ...' )
62- X = pandas .read_table (treatment_file , sep = '\t ' )
40+ if len (treatment_files ) > 1 :
41+ treatment_files .sort ()
42+ K = len (treatment_files )
43+ ResultTable = pandas .DataFrame ()
44+ X1 = pandas .read_table (treatment_files [0 ], sep = '\t ' )
45+ # Pre-process gene rank tables in case of STARS
6346 if metric == 'STARS' :
64- # use only genes from consensus list
65- I = [X [X ['gene' ]== gene ].index [0 ] for gene in Genes_0 ]
66- X0 = X .iloc [I ]
67- X0 .sort_values ('gene' ,ascending = 1 )
47+ # Compute consensus gene list (present in all replicates)
48+ print ('Computing consensus gene list from STARS output ...' )
49+ Genes_0 = set (X1 ['gene' ])
50+ for treatment_file in treatment_files :
51+ X = pandas .read_table (treatment_file , sep = '\t ' )
52+ Genes = set (X ['gene' ])
53+ Genes_0 = Genes_0 .intersection (Genes )
54+ G = len (Genes_0 )
6855 else :
69- X0 = X .sort_values ('gene' ,ascending = 1 )
70- genes = list (X0 ['gene' ])
71- ResultTable ['gene' ] = genes
72- pval = list (X0 ['p_value (adj.)' ])
73- ResultTable ['p-value Repl. ' + str (k )] = pval
74- ln_pval = [numpy .log (pval [i ]+ eps ) for i in range (G )]
75- chi = numpy .add (chi ,ln_pval )
76-
77- # Combine p-values
78- print ('Computing Fisher statistic ...' )
79- chi = [- 2 * chi [i ] for i in range (G )]
80- ResultTable ['Fisher Statistic' ] = chi
81- PVal = [1 - scipy .stats .chi2 .cdf (chi [i ],2 * K ) for i in range (G )]
82- ResultTable ['p-value combined' ] = PVal
83- significant = [PVal [i ] < alpha for i in range (G )]
84- ResultTable ['significant' ] = significant
85- ResultTable = ResultTable .sort_values (['significant' ,'p-value combined' ],ascending = [0 ,1 ])
86- print ('Writing results dataframe ...' )
87- ResultFilename = treatment + '_combined_' + str (alpha )+ '_' + str (padj )+ '_' + str (metric )\
88- + '_P' + str (Np )+ '_GeneList.txt'
89- ResultTable .to_csv (ResultFilename , sep = '\t ' , index = False )
56+ G = len (X1 )
57+ # Read replicates
58+ chi = list (numpy .zeros (G ))
59+ k = 0
60+ for treatment_file in treatment_files :
61+ k += 1
62+ print ('Reading ' + treatment + ' replicate ' + str (k )+ ' ...' )
63+ X = pandas .read_table (treatment_file , sep = '\t ' )
64+ if metric == 'STARS' :
65+ # use only genes from consensus list
66+ I = [X [X ['gene' ]== gene ].index [0 ] for gene in Genes_0 ]
67+ X0 = X .iloc [I ]
68+ X0 .sort_values ('gene' ,ascending = 1 )
69+ else :
70+ X0 = X .sort_values ('gene' ,ascending = 1 )
71+ genes = list (X0 ['gene' ])
72+ ResultTable ['gene' ] = genes
73+ pval = list (X0 ['p_value (adj.)' ])
74+ ResultTable ['p-value Repl. ' + str (k )] = pval
75+ ln_pval = [numpy .log (pval [i ]+ eps ) for i in range (G )]
76+ chi = numpy .add (chi ,ln_pval )
77+
78+ # Combine p-values
79+ print ('Computing Fisher statistic ...' )
80+ chi = [- 2 * chi [i ] for i in range (G )]
81+ ResultTable ['Fisher Statistic' ] = chi
82+ PVal = [1 - scipy .stats .chi2 .cdf (chi [i ],2 * K ) for i in range (G )]
83+ ResultTable ['p-value combined' ] = PVal
84+ significant = [PVal [i ] < alpha for i in range (G )]
85+ ResultTable ['significant' ] = significant
86+ ResultTable = ResultTable .sort_values (['significant' ,'p-value combined' ],ascending = [0 ,1 ])
87+ print ('Writing results dataframe ...' )
88+ ResultFilename = treatment + '_combined_' + str (alpha )+ '_' + str (padj )+ '_' + str (metric )\
89+ + '_P' + str (Np )+ '_GeneList.txt'
90+ ResultTable .to_csv (ResultFilename , sep = '\t ' , index = False )
9091
9192 # Time stamp
9293 end = time .time ()
0 commit comments