Skip to content

Commit eff24d0

Browse files
authored
Merge pull request #178 from BackofenLab/dev
v3.1.4
2 parents 3a31d0b + 86dd7be commit eff24d0

26 files changed

+757
-292
lines changed

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ script:
2626
- cd $TRAVIS_BUILD_DIR
2727
# generate autotools's files
2828
- bash autotools-init.sh
29-
# run configure
29+
# run configure (without boost checks)
3030
- ./configure --prefix=$HOME/IntaRNA --with-vrna=$HOME/miniconda/envs/build-IntaRNA --with-boost=no --without-zlib
3131
# compile documentation
3232
# - make doxygen-doc
3333
# compile, test and install IntaRNA
3434
- make -j 2 && make tests -j 2 && make install
3535
##### check IntaRNA build #####
36-
# run IntaRNA with help output
36+
# run installed IntaRNA with help output
3737
- $HOME/IntaRNA/bin/IntaRNA -h

ChangeLog

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,59 @@
1010
# changes in development version since last release
1111
################################################################################
1212

13+
1314
################################################################################
1415
################################################################################
1516

17+
18+
################################################################################
19+
### version 3.1.4
20+
################################################################################
21+
22+
# IntaRNA
23+
- bugfix generation and tracing of seeds with bulges and no GU ends
24+
- bugfix seed-extension prediction for seeds with bulges
25+
- noLP for seeds with bulges enabled
26+
27+
# R
28+
- `IntaRNA_CSV_p-value.R` script to estimate p-values based on energy values
29+
- `IntaRNA_plotRegions.R` = renaming of former `plotRegions.R`
30+
31+
################################################################################
32+
33+
200130 Martin Raden
34+
* IntaRNA/SeedHandlerMfe :
35+
* bugfix generation and tracing of seeds with bulges and no GU ends
36+
* IntaRNA/PredictorMfe*SeedExtension* :
37+
* bugfix enumeration of seeds with bulges
38+
* bin/CommandLineParseing :
39+
* error msgs rephrased
40+
+ noLP for seeds with bulges enabled
41+
+ setup noLP for seed constraints via outNoLP
42+
* IntaRNA/SeedConstraint :
43+
+ isLpAllowed : whether or not lps are allowed in seeds
44+
* IntaRNA/SeedHandlerMfe :
45+
+ support for noLP constraint
46+
* test/SeedHandlerMfe :
47+
+ test with lp
48+
+ test no lp (boundary)
49+
+ test no lp (internal)
50+
* test/*
51+
* adaptation to SeedConstraint constructor changes
52+
53+
200121 Martin Raden
54+
+ R/IntaRNA_CSV_p-value.R : former addPvalues2csv.R
55+
+ R/IntaRNA_plotRegions.R : former plotRegions.R
56+
- R/addPvalues2csv.R : renamed
57+
- R/plotRegions.R : renamed
58+
* README.md : adapted to renamings
59+
+ R/Makefile.am : install R scripts
60+
61+
191115 Martin Raden
62+
+ R/addPvalues2csv.R
63+
* R/README.md :
64+
+ docu of addPvalue2csv.R
65+
1666
################################################################################
1767
### version 3.1.3
1868
################################################################################

Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
export GCC_COLORS ?= "error=01;31:warning=01;35:note=01;36:caret=01;32:locus=01:quote=01"
66

77
# sub directories to check for Makefiles
8-
SUBDIRS = src python perl tests doc .
8+
SUBDIRS = src python perl R tests doc .
99

1010
# list of all personalities to be installed
1111
#PERSONALITIES = `grep -P "^\\s+case\\s+IntaRNA\\S+\\s*:\\s*return" $(abs_top_srcdir)/src/bin/CommandLineParsing.h | sed "s/^\\s*case\\s\\+\\(IntaRNA\\S*\\)\\s\\+:\\s\\+return.*/\\1/g"`

R/IntaRNA_CSV_p-value.R

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#!/usr/bin/env Rscript
2+
3+
####################################################################
4+
# Computes p-values and false discovery rates (fdr following Benjamin+Hochberg)
5+
# by fitting a GEV on the energy values computed by IntaRNA.
6+
# Note, such p-value estimates are only useful for genome-wide predictions.
7+
#
8+
# arguments: <IntaRNA-output-CSV> [<out-CSV> = <intarna-csv-output>] [<col-name-E> = E]
9+
#
10+
# 1 <IntaRNA-output-CSV> = ";"-separated CSV output of IntaRNA
11+
# 2 <out-CSV> = file name to write the extended CSV output to (2 new columns)
12+
# 3 <col-name-E> = the column name that holds the energy values to be fitted
13+
#
14+
# example call:
15+
#
16+
# Rscript --vanilla IntaRNA_CSV_p-value.R predictions.csv
17+
#
18+
# This script is part of the IntaRNA source code package. See
19+
# respective licence and documentation for further information.
20+
#
21+
# https://github.com/BackofenLab/IntaRNA
22+
#
23+
####################################################################
24+
25+
26+
####################################################################
27+
# get command line arguments
28+
####################################################################
29+
30+
args = commandArgs(trailingOnly=TRUE)
31+
# check and parse
32+
if (length(args)<1) { stop("call with <intarna-csv-output> [<out-file-with-p-values> = <intarna-csv-output>] [<col-name-E> = E]", call.=FALSE) }
33+
34+
# get input file = IntaRNA csv output
35+
intarnaOutputFile = args[1];
36+
if (!file.exists(intarnaOutputFile )) { stop("intarna-csv-output file '", intarnaOutputFile, "' does not exist!", call.=FALSE) }
37+
38+
# get output file
39+
outFile = intarnaOutputFile;
40+
if (length(args)>=2) {
41+
outFile = args[2]
42+
}
43+
44+
# set column to get energies from
45+
colNameE = "E"
46+
# get column name from argument if present
47+
if (length(args)>=3) {
48+
colNameE = args[3]
49+
}
50+
51+
# column delimiter used in CSV input / output
52+
csvColSep = ";"
53+
54+
# number of digits of p-values
55+
pValPrec = 7
56+
57+
####################################################################
58+
# fits a generalized extreme value distribution to the given energy data
59+
# adopted from 'gev' function of 'evir' library
60+
# @param energy the IntaRNA energy values to fit (a vector)
61+
# @return the fitting parameters xi, mu, and sigma
62+
gevFitting <- function (energy)
63+
####################################################################
64+
{
65+
n.all <- NA
66+
energy <- as.numeric(-energy)
67+
n <- length(energy)
68+
sigma0 <- sqrt(6 * var(energy))/pi
69+
mu0 <- mean(energy) - 0.57722 * sigma0
70+
xi0 <- 0.1
71+
theta <- c(xi0, sigma0, mu0)
72+
negloglik <- function(theta, tmp) {
73+
y <- 1 + (theta[1] * (tmp - theta[3]))/theta[2]
74+
if ((theta[2] < 0) || (min(y) < 0))
75+
out <- 1e+06
76+
else {
77+
term1 <- length(tmp) * logb(theta[2])
78+
term2 <- sum((1 + 1/theta[1]) * logb(y))
79+
term3 <- sum(y^(-1/theta[1]))
80+
out <- term1 + term2 + term3
81+
}
82+
out
83+
}
84+
# compute fit
85+
fit <- optim(theta, negloglik, hessian = TRUE, tmp = energy)
86+
if (fit$convergence)
87+
warning("gev fit optimization may not have succeeded")
88+
89+
return( list( xi=fit$par[1], sigma=fit$par[2], mu=fit$par[3] ) )
90+
}
91+
92+
93+
####################################################################
94+
# computes p-values for the given energy values and GEV distribution
95+
# adopted from 'pgev' function of 'evir' library
96+
# @param energy IntaRNA energy values
97+
# @param gev GEV parameters
98+
# @return p-values for each energy value
99+
gevPvalue <- function (energy, gev=list( xi = 1, mu = 0, sigma = 1) )
100+
####################################################################
101+
{
102+
return ( 1 - exp( - (1 + (gev$xi * ((-energy) - gev$mu))/gev$sigma)^(-1 /gev$xi)))
103+
}
104+
105+
106+
107+
####################################################################
108+
# parse IntaRNA CSV
109+
####################################################################
110+
111+
d = read.csv( intarnaOutputFile, sep=csvColSep )
112+
113+
# check if energy column present
114+
if (!is.element(colNameE, colnames(d))) {
115+
stop("'",colNameE,"' is not among the column names of '",intarnaOutputFile,"'", call.=FALSE);
116+
}
117+
# check if unique
118+
if (sum(colnames(d) == colNameE)>1) {
119+
stop("column name '",colNameE,"' occurs more than once in '",intarnaOutputFile,"'", call.=FALSE);
120+
}
121+
122+
123+
####################################################################
124+
# fit p-values
125+
####################################################################
126+
127+
# get energies to fit
128+
E = d[,colnames(d) == colNameE]
129+
130+
131+
# fit negated energies
132+
gevfit <- gevFitting(E) # fitten
133+
134+
# get rounded pValue
135+
pVal <- round( gevPvalue( E, gevfit ), digits=pValPrec )
136+
# get rounded fdr
137+
fdr <- round( p.adjust(pVal, method="BH"), digits=pValPrec )
138+
139+
####################################################################
140+
# write output
141+
####################################################################
142+
143+
o = cbind( d, pVal, fdr )
144+
colnames(o)[ncol(o)-1] = "p-value"
145+
colnames(o)[ncol(o)] = "fdr"
146+
147+
write.table( o, outFile, sep=csvColSep, row.names=FALSE, col.names = TRUE, quote=FALSE )
148+
149+
150+
#################################################################EOF

0 commit comments

Comments
 (0)