Skip to content

Commit 94aa984

Browse files
committed
noLP for seeds with bulges enabled
1 parent 1becda3 commit 94aa984

13 files changed

+279
-79
lines changed

ChangeLog

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212

1313
# IntaRNA
1414
- bugfix generation and tracing of seeds with bulges and no GU ends
15-
- bugfix seed-extension prediction for seeds with bulges
15+
- bugfix seed-extension prediction for seeds with bulges
16+
- noLP for seeds with bulges enabled
1617

1718
# R
1819
- `IntaRNA_CSV_p-value.R` script to estimate p-values based on energy values
@@ -26,6 +27,20 @@
2627
* bugfix generation and tracing of seeds with bulges and no GU ends
2728
* IntaRNA/PredictorMfe*SeedExtension* :
2829
* bugfix enumeration of seeds with bulges
30+
* bin/CommandLineParseing :
31+
* error msgs rephrased
32+
+ noLP for seeds with bulges enabled
33+
+ setup noLP for seed constraints via outNoLP
34+
* IntaRNA/SeedConstraint :
35+
+ isLpAllowed : whether or not lps are allowed in seeds
36+
* IntaRNA/SeedHandlerMfe :
37+
+ support for noLP constraint
38+
* test/SeedHandlerMfe :
39+
+ test with lp
40+
+ test no lp (boundary)
41+
+ test no lp (internal)
42+
* test/*
43+
* adaptation to SeedConstraint constructor changes
2944

3045
200121 Martin Raden
3146
+ R/IntaRNA_CSV_p-value.R : former addPvalues2csv.R

src/IntaRNA/SeedConstraint.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class SeedConstraint {
3939
* @param explicitSeeds the encodings of explicit seed interactions to be used
4040
* @param noGUallowed whether or not GU base pairs are allowed within seeds
4141
* @param noGUendAllowed whether or not GU base pairs are allowed at the ends of seeds
42+
* @param noLP whether or not lonely base pairs are allowed
4243
*/
4344
SeedConstraint( const size_t bp
4445
, const size_t maxUnpairedOverall
@@ -52,6 +53,7 @@ class SeedConstraint {
5253
, const std::string & explicitSeeds
5354
, const bool noGUallowed
5455
, const bool noGUendAllowed
56+
, const bool noLP
5557
);
5658

5759
virtual ~SeedConstraint();
@@ -148,6 +150,13 @@ class SeedConstraint {
148150
bool
149151
isGUendAllowed() const;
150152

153+
/**
154+
* Whether or not lonely base pairs are allowed
155+
* @return true if lonely base pairs are allowed; false otherwise
156+
*/
157+
bool
158+
isLpAllowed() const;
159+
151160
/**
152161
* Index ranges in seq1 to be searched for seeds or empty if all indices
153162
* are to be considered.
@@ -240,6 +249,9 @@ class SeedConstraint {
240249
//! whether or not GU base pairs are allowed at seed ends
241250
bool bpGUendAllowed;
242251

252+
//! whether or not lonely base pairs are allowed
253+
bool lpAllowed;
254+
243255
};
244256

245257

@@ -261,6 +273,7 @@ SeedConstraint::SeedConstraint(
261273
, const std::string & explicitSeeds
262274
, const bool noGUallowed
263275
, const bool noGUendAllowed
276+
, const bool noLP
264277
)
265278
:
266279
bp(bp_)
@@ -275,6 +288,7 @@ SeedConstraint::SeedConstraint(
275288
, explicitSeeds(explicitSeeds)
276289
, bpGUallowed(!noGUallowed)
277290
, bpGUendAllowed(!noGUendAllowed)
291+
, lpAllowed(!noLP)
278292
{
279293
if (bp < 2) throw std::runtime_error("SeedConstraint() : base pair number ("+toString(bp)+") < 2");
280294
}
@@ -440,6 +454,15 @@ isGUendAllowed() const {
440454

441455
/////////////////////////////////////////////////////////////////////////////
442456

457+
inline
458+
bool
459+
SeedConstraint::
460+
isLpAllowed() const {
461+
return lpAllowed;
462+
}
463+
464+
/////////////////////////////////////////////////////////////////////////////
465+
443466
inline
444467
std::ostream&
445468
operator<<(std::ostream& out, const SeedConstraint& c)

src/IntaRNA/SeedHandlerMfe.cpp

Lines changed: 93 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ fillSeed( const size_t i1min, const size_t i1max, const size_t i2min, const size
3737
size_t i1, i2, bpIn, u1, u2, j1, j2, u1p, u2p, k1,k2, u1best, u2best;
3838
E_type curE, bestE;
3939

40+
// determine whether or not lonely base pairs are allowed or if we have to
41+
// ensure a stacking to the right of the left boundary (i1,i2)
42+
const size_t noLpShift = seedConstraint.isLpAllowed() ? 0 : 1;
43+
E_type iStackE = E_type(0);
44+
4045
size_t seedCountNotInf = 0, seedCount = 0;
4146

4247
// fill for all start indices
@@ -75,6 +80,17 @@ fillSeed( const size_t i1min, const size_t i1max, const size_t i2min, const size
7580
// check if this index range is to be considered for seed search
7681
bool validSeedSite = isFeasibleSeedBasePair(j1,j2,true);
7782

83+
// if no LP : check for direct right-stacking of i
84+
if (noLpShift > 0) {
85+
// check if feasible extension
86+
if (isFeasibleSeedBasePair(i1+1,i2+1)) {
87+
// get stacking energy
88+
iStackE = energy.getE_interLeft(i1,i1+1,i2,i2+1);
89+
} else {
90+
validSeedSite = false;
91+
}
92+
}
93+
7894
// init current seed energy
7995
curE = E_INF;
8096

@@ -83,31 +99,47 @@ fillSeed( const size_t i1min, const size_t i1max, const size_t i2min, const size
8399

84100
// base case: only left and right base pair present
85101
if (bpIn==0) {
86-
// energy for stacking/bulge/interior depending on u1/u2
87-
curE = energy.getE_interLeft(i1,j1,i2,j2);
102+
// if lonely bps are allowed or no bulge
103+
if (noLpShift == 0 || (u1==0 && u2==0)) {
104+
// energy for stacking/bulge/interior depending on u1/u2
105+
curE = energy.getE_interLeft(i1,j1,i2,j2);
106+
}
88107

89108
} else {
90-
// split seed recursively into all possible leading interior loops
91-
// i1 .. i1+u1p+1 .. j1
92-
// i2 .. i2+u2p+1 .. j2
93-
for (u1p=1+std::min(u1,energy.getMaxInternalLoopSize1()); u1p-- > 0;) {
94-
for (u2p=1+std::min(u2,energy.getMaxInternalLoopSize2()); u2p-- > 0;) {
95-
96-
k1 = i1+u1p+1;
97-
k2 = i2+u2p+1;
98-
// check if split pair is complementary
99-
// and recursed entry is < E_INF
100-
if (! (isFeasibleSeedBasePair(k1,k2) && E_isNotINF( getSeedE( k1-offset1, k2-offset2, bpIn-1, u1-u1p, u2-u2p ) ) ) ) {
101-
continue; // not complementary -> skip
102-
}
103-
104-
// update mfe for split at k1,k2
105-
curE = std::min( curE,
106-
energy.getE_interLeft(i1,k1,i2,k2)
107-
+ getSeedE( k1-offset1, k2-offset2, bpIn-1, u1-u1p, u2-u2p )
108-
);
109-
} // u2p
110-
} // u1p
109+
110+
// explicitly check direct stacking extension in noLP mode
111+
if (noLpShift > 0 && E_isNotINF( getSeedE( i1+1-offset1, i2+1-offset2, bpIn-1, u1, u2 ) )) {
112+
curE = std::min( curE, iStackE + getSeedE( i1+1-offset1, i2+1-offset2, bpIn-1, u1, u2 ) );
113+
}
114+
115+
// if enough interior base pairs left
116+
if (bpIn >= 1+noLpShift) {
117+
// split seed recursively into all possible leading interior loops
118+
// i1 .. i1+u1p+1 .. j1
119+
// i2 .. i2+u2p+1 .. j2
120+
for (u1p=1+std::min(u1,energy.getMaxInternalLoopSize1()); u1p-- > 0;) {
121+
for (u2p=1+std::min(u2,energy.getMaxInternalLoopSize2()); u2p-- > 0;) {
122+
123+
// skip stacked extension for noLP since already covered above
124+
if (u1p+u2p < noLpShift) { continue; }
125+
126+
k1 = i1+u1p+1+noLpShift;
127+
k2 = i2+u2p+1+noLpShift;
128+
// check if split pair is complementary
129+
// and recursed entry is < E_INF
130+
if (! (isFeasibleSeedBasePair(k1,k2) && E_isNotINF( getSeedE( k1-offset1, k2-offset2, bpIn-1-noLpShift, u1-u1p, u2-u2p ) ) ) ) {
131+
continue; // not complementary -> skip
132+
}
133+
134+
// update mfe for split at k1,k2
135+
curE = std::min( curE,
136+
iStackE
137+
+ energy.getE_interLeft(i1+noLpShift,k1,i2+noLpShift,k2)
138+
+ getSeedE( k1-offset1, k2-offset2, bpIn-1-noLpShift, u1-u1p, u2-u2p )
139+
);
140+
} // u2p
141+
} // u1p
142+
}
111143
} // more than two base pairs
112144

113145
} // (j1,j2) complementary
@@ -215,10 +247,17 @@ traceBackSeed( Interaction & interaction
215247
// get energy of provided seed
216248
E_type curE = getSeedE(i1_,i2_,bpInbetween,u1_,u2_);
217249

250+
// determine whether or not lonely base pairs are allowed or if we have to
251+
// ensure a stacking to the right of the left boundary (i1,i2)
252+
const size_t noLpShift = seedConstraint.isLpAllowed() ? 0 : 1;
253+
E_type iStackE = E_type(0);
254+
218255
// trace seed
219256
// trace each seed base pair (excluding right most)
220257
for( size_t bpIn=1+bpInbetween; bpIn-- > 0; ) {
221258

259+
260+
222261
// base case: only left and right base pair present
223262
if (bpIn==0) {
224263
// add left base pair if not left seed boundary
@@ -227,33 +266,58 @@ traceBackSeed( Interaction & interaction
227266
}
228267

229268
} else {
269+
270+
// if no LP : check for direct right-stacking of i
271+
if (noLpShift > 0) {
272+
// check if feasible extension
273+
assert(isFeasibleSeedBasePair(i1+1,i2+1));
274+
// get stacking energy
275+
iStackE = energy.getE_interLeft(i1+offset1,i1+1+offset1,i2+offset2,i2+1+offset2);
276+
// noLP : check stacking of i
277+
if ( E_equal( curE, iStackE + getSeedE( i1+1, i2+1, bpIn-1, u1max, u2max )) ) {
278+
i1++;
279+
i2++;
280+
curE = getSeedE( i1+1, i2+1, bpIn-1, u1max, u2max );
281+
continue;
282+
}
283+
// sanity check for noLP mode
284+
assert( bpIn >= 1+noLpShift );
285+
}
286+
230287
// split seed recursively into all possible leading interior loops
231288
// i1 .. i1+u1p+1 .. j1
232289
// i2 .. i2+u2p+1 .. j2
233290
bool traceNotFound = true;
234291
for (u1=1+u1max; traceNotFound && u1-- > 0;) {
235292
for (u2=1+u2max; traceNotFound && u2-- > 0;) {
236293
// check if overall number of unpaired is not exceeded
237-
if (u1+u2 > uMax) {
294+
// or skip stacked extension since covered above
295+
if (u1+u2 > uMax || u1+u2 < noLpShift) {
238296
continue;
239297
}
240298

241-
k1 = i1+u1+1;
242-
k2 = i2+u2+1;
299+
k1 = i1+u1+1+noLpShift;
300+
k2 = i2+u2+1+noLpShift;
243301

244302
// check if valid trace
245303
if ( isFeasibleSeedBasePair(k1+offset1, k2+offset2) && E_isNotINF( getSeedE( k1, k2, bpIn-1, u1max-u1, u2max-u2 ) ) ) {
246304

247305
// check if correct trace
248-
if ( E_equal( curE, energy.getE_interLeft(i1+offset1,k1+offset1,i2+offset2,k2+offset2)
249-
+ getSeedE( k1, k2, bpIn-1, u1max-u1, u2max-u2 )) )
306+
if ( E_equal( curE, iStackE
307+
+ energy.getE_interLeft(i1+noLpShift+offset1,k1+offset1,i2+noLpShift+offset2,k2+offset2)
308+
+ getSeedE( k1, k2, bpIn-1-noLpShift, u1max-u1, u2max-u2 )) )
250309
{
251310
// store left base pair if not left seed boundary
252311
if (i1 != i1_) {
253312
interaction.basePairs.push_back( energy.getBasePair(i1+offset1,i2+offset2) );
254313
}
314+
if (noLpShift > 0) {
315+
interaction.basePairs.push_back( energy.getBasePair(i1+noLpShift+offset1,i2+noLpShift+offset2) );
316+
// reflect additional base pair
317+
bpIn--;
318+
}
255319
// store next energy value to trace
256-
curE = getSeedE( k1, k2, bpIn-1, u1max-u1, u2max-u2 );
320+
curE = getSeedE( k1, k2, bpIn-1-noLpShift, u1max-u1, u2max-u2 );
257321
// reset for next trace step
258322
i1 = k1;
259323
i2 = k2;

src/bin/CommandLineParsing.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2382,6 +2382,7 @@ getSeedConstraint( const InteractionEnergy & energy ) const
23822382
, seedTQ
23832383
, seedNoGU
23842384
, seedNoGUend
2385+
, outNoLP
23852386
);
23862387
}
23872388
return *seedConstraint;
@@ -2402,9 +2403,6 @@ getSeedHandler( const InteractionEnergy & energy ) const
24022403
} else {
24032404
// check if we have to allow for bulges in seed
24042405
if (seedConstr.getMaxUnpaired1()+seedConstr.getMaxUnpaired2()+seedConstr.getMaxUnpairedOverall() > 0) {
2405-
if (outNoLP) {
2406-
INTARNA_NOT_IMPLEMENTED("outNoLP not yet implemented for seeds with bulges");
2407-
}
24082406
// create new seed handler using mfe computation
24092407
return new SeedHandlerMfe( energy, seedConstr );
24102408
} else {

tests/HelixHandlerNoBulgeMaxSeedIdxOffset_test.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ TEST_CASE( "HelixHandlerIdxOffset for NoBulgeMaxSeed", "[HelixHandlerIdxOffset]"
3030

3131
// seedBP / seedMaxUP / seedTMaxUP / seedQMaxUP / seedMaxE / seedMaxED / seedTRange / seedQRange / seedTQ / seedNoGU
3232
SeedConstraint sC(3, 0, 0, 0, 0, AccessibilityDisabled::ED_UPPER_BOUND, 0, IndexRangeList(""), IndexRangeList(""),
33-
"", false, false);
33+
"", false, false, false);
3434

3535
HelixHandler *hhS = new HelixHandlerNoBulgeMax(energy, hC);
3636
SeedHandler *sH = new SeedHandlerMfe(energy, sC);
@@ -128,7 +128,7 @@ TEST_CASE( "HelixHandlerIdxOffset for NoBulgeMaxSeed", "[HelixHandlerIdxOffset]"
128128

129129
// seedBP / seedMaxUP / seedTMaxUP / seedQMaxUP / seedMaxE / seedMaxED / seedTRange / seedQRange / seedTQ / seedNoGU
130130
SeedConstraint sC(3, 0, 0, 0, 0, AccessibilityDisabled::ED_UPPER_BOUND, 0, IndexRangeList(""), IndexRangeList(""),
131-
"", false, false);
131+
"", false, false, false);
132132

133133
HelixHandler *hhS = new HelixHandlerNoBulgeMax(energy, hC);
134134
SeedHandler *sH = new SeedHandlerMfe(energy, sC);
@@ -192,7 +192,7 @@ TEST_CASE( "HelixHandlerIdxOffset for NoBulgeMaxSeed", "[HelixHandlerIdxOffset]"
192192

193193
// seedBP / seedMaxUP / seedTMaxUP / seedQMaxUP / seedMaxE / seedMaxED / seedTRange / seedQRange / seedTQ / seedNoGU
194194
SeedConstraint sC(3, 0, 0, 0, 0, AccessibilityDisabled::ED_UPPER_BOUND, 0, IndexRangeList(""), IndexRangeList(""),
195-
"", false, false);
195+
"", false, false, false);
196196

197197
HelixHandler *hhS = new HelixHandlerNoBulgeMax(energy, hC);
198198
SeedHandler *sH = new SeedHandlerMfe(energy, sC);
@@ -268,7 +268,7 @@ TEST_CASE( "HelixHandlerIdxOffset for NoBulgeMaxSeed", "[HelixHandlerIdxOffset]"
268268

269269
// seedBP / seedMaxUP / seedTMaxUP / seedQMaxUP / seedMaxE / seedMaxED / seedTRange / seedQRange / seedTQ / seedNoGU
270270
SeedConstraint sC(3, 2, 1, 1, 0, AccessibilityDisabled::ED_UPPER_BOUND, 0, IndexRangeList(""), IndexRangeList(""),
271-
"", false, false);
271+
"", false, false, false);
272272

273273
HelixHandler *hhS = new HelixHandlerNoBulgeMax(energy, hC);
274274
SeedHandler *sH = new SeedHandlerMfe(energy, sC);
@@ -364,7 +364,7 @@ TEST_CASE( "HelixHandlerIdxOffset for NoBulgeMaxSeed", "[HelixHandlerIdxOffset]"
364364

365365
// seedBP / seedMaxUP / seedTMaxUP / seedQMaxUP / seedMaxE / seedMaxED / seedTRange / seedQRange / seedTQ / seedNoGU
366366
SeedConstraint sC(3, 0, 0, 0, 0, AccessibilityDisabled::ED_UPPER_BOUND, 0, IndexRangeList(""), IndexRangeList(""),
367-
"", false, false);
367+
"", false, false, false);
368368

369369
HelixHandler *hhS = new HelixHandlerNoBulgeMax(energy, hC);
370370
SeedHandler *sH = new SeedHandlerMfe(energy, sC);

tests/HelixHandlerNoBulgeMaxSeed_test.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ TEST_CASE( "HelixHandlerNoBulgeMaxSeed", "[HelixHandlerNoBulgeMax]" ) {
3030
, AccessibilityDisabled::ED_UPPER_BOUND, 0
3131
, IndexRangeList("")
3232
, IndexRangeList("")
33-
, "", false, false );
33+
, "", false, false, false );
3434

3535
SeedHandlerMfe sH(energy, sC);
3636
HelixHandlerNoBulgeMax hhS(energy, hC);
@@ -118,7 +118,7 @@ TEST_CASE( "HelixHandlerNoBulgeMaxSeed", "[HelixHandlerNoBulgeMax]" ) {
118118

119119
// seedBP / seedMaxUP / seedTMaxUP / seedQMaxUP / seedMaxE / seedMaxED / seedTRange / seedQRange / seedTQ / seedNoGU
120120
SeedConstraint sC(3, 0, 0, 0, 0, AccessibilityDisabled::ED_UPPER_BOUND, 0, IndexRangeList(""), IndexRangeList(""),
121-
"", false, false);
121+
"", false, false, false);
122122

123123
SeedHandlerMfe sH(energy, sC);
124124
HelixHandlerNoBulgeMax hhS(energy, hC);
@@ -172,7 +172,7 @@ TEST_CASE( "HelixHandlerNoBulgeMaxSeed", "[HelixHandlerNoBulgeMax]" ) {
172172

173173
// seedBP / seedMaxUP / seedTMaxUP / seedQMaxUP / seedMaxE / seedMaxED / seedTRange / seedQRange / seedTQ / seedNoGU
174174
SeedConstraint sC(3, 0, 0, 0, 0, AccessibilityDisabled::ED_UPPER_BOUND, 0, IndexRangeList(""), IndexRangeList(""),
175-
"", false, false);
175+
"", false, false, false);
176176

177177
SeedHandlerMfe sH(energy, sC);
178178
HelixHandlerNoBulgeMax hhS(energy, hC);
@@ -239,7 +239,7 @@ TEST_CASE( "HelixHandlerNoBulgeMaxSeed", "[HelixHandlerNoBulgeMax]" ) {
239239

240240
// seedBP / seedMaxUP / seedTMaxUP / seedQMaxUP / seedMaxE / seedMaxED / seedTRange / seedQRange / seedTQ / seedNoGU
241241
SeedConstraint sC(3, 2, 1, 1, 0, AccessibilityDisabled::ED_UPPER_BOUND, 0, IndexRangeList(""), IndexRangeList(""),
242-
"", false, false);
242+
"", false, false, false);
243243

244244
SeedHandlerMfe sH(energy, sC);
245245
HelixHandlerNoBulgeMax hhS(energy, hC);

0 commit comments

Comments
 (0)