|
| 1 | +/* BranchTypeIdentification.cpp |
| 2 | + Author: Maxie D. Schmidt ([email protected]) |
| 3 | + Created: 2018.06.16 |
| 4 | +*/ |
| 5 | + |
| 6 | +#include <stdio.h> |
| 7 | +#include <stdlib.h> |
| 8 | +#include <string.h> |
| 9 | + |
| 10 | +#include <vector> |
| 11 | +#include <algorithm> |
| 12 | +using std::vector; |
| 13 | +using std::sort; |
| 14 | + |
| 15 | +#include "BranchTypeIdentification.h" |
| 16 | +//#include "Utils.h" |
| 17 | + |
| 18 | +vector<RNAStructure::BaseData *> RNABranchType_t::getEnclosingArcs( |
| 19 | + RNAStructure * &rnaStructBase, bool removeTopFour = false) { |
| 20 | + vector<RNAStructure::BaseData *> rBaseDataVec; |
| 21 | + for(int bd = 0; bd < rnaStructBase->GetLength(); bd++) { |
| 22 | + bool isAlreadyEnclosed = false; |
| 23 | + RNAStructure::BaseData *curBaseData = rnaStructBase->GetBaseAt(bd); |
| 24 | + for(int v = 0; v < rBaseDataVec.size(); v++) { |
| 25 | + if(curBaseData->m_pair == RNAStructure::UNPAIRED) { |
| 26 | + isAlreadyEnclosed = true; |
| 27 | + break; |
| 28 | + } |
| 29 | + else if(curBaseData->isContainedIn(*(rBaseDataVec[v]))) { |
| 30 | + isAlreadyEnclosed = true; |
| 31 | + break; |
| 32 | + } |
| 33 | + else if(rBaseDataVec[v]->isContainedIn(*curBaseData)) { |
| 34 | + rBaseDataVec[v] = curBaseData; |
| 35 | + isAlreadyEnclosed = true; |
| 36 | + break; |
| 37 | + } |
| 38 | + } |
| 39 | + if(!isAlreadyEnclosed && (rBaseDataVec.size() > 0 || curBaseData->m_pair != RNAStructure::UNPAIRED)) { |
| 40 | + rBaseDataVec.push_back(curBaseData); |
| 41 | + } |
| 42 | + } |
| 43 | + sort(rBaseDataVec.begin(), rBaseDataVec.end(), RNAStructureBaseDataArcLengthSort()); |
| 44 | + if(removeTopFour) { |
| 45 | + rBaseDataVec.erase(rBaseDataVec.begin(), rBaseDataVec.begin() + 4); |
| 46 | + } |
| 47 | + return rBaseDataVec; |
| 48 | +} |
| 49 | + |
| 50 | +RNABranchType_t::RNABranchType_t(BranchID_t bid = BRANCH_UNDEFINED, class RNAStructure::BaseData *bparent = NULL) { |
| 51 | + branchID = bid; |
| 52 | + branchParent = bparent; |
| 53 | +} |
| 54 | + |
| 55 | +RNABranchType_t & RNABranchType_t::operator=(const BranchID_t &rhs) { |
| 56 | + setBranchID(rhs); |
| 57 | + branchParent = NULL; |
| 58 | + return *this; |
| 59 | +} |
| 60 | + |
| 61 | +bool RNABranchType_t::operator==(const RNABranchType_t &rhs) const { |
| 62 | + return branchID == rhs.branchID && branchParent == rhs.branchParent; |
| 63 | +} |
| 64 | + |
| 65 | +bool RNABranchType_t::operator==(const BranchID_t &rhs) const { |
| 66 | + return branchID == rhs; |
| 67 | +} |
| 68 | + |
| 69 | +const BranchID_t & RNABranchType_t::getBranchID() const { |
| 70 | + return branchID; |
| 71 | +} |
| 72 | + |
| 73 | +void RNABranchType_t::setBranchID(BranchID_t bid) { |
| 74 | + branchID = bid; |
| 75 | +} |
| 76 | + |
| 77 | +const RNAStructure::BaseData* RNABranchType_t::getBranchParent() const { |
| 78 | + return branchParent; |
| 79 | +} |
| 80 | + |
| 81 | +void RNABranchType_t::setBranchParent(class RNAStructure::BaseData* bparent) { |
| 82 | + branchParent = bparent; |
| 83 | +} |
| 84 | + |
| 85 | +void RNABranchType_t::SetBranchColor(cairo_t * &cr, BranchID_t bt) { |
| 86 | + switch(bt) { |
| 87 | + case BRANCH_UNDEFINED: |
| 88 | + cairo_set_source_rgb(cr, 0.0, 0.0, 0.0); |
| 89 | + break; |
| 90 | + case BRANCH1: |
| 91 | + cairo_set_source_rgb(cr, 92.0 / 255, 160.0 / 255, 215.0 / 255); |
| 92 | + break; |
| 93 | + case BRANCH2: |
| 94 | + cairo_set_source_rgb(cr, 183.0 / 255, 127.0 / 255, 77.0 / 255); |
| 95 | + break; |
| 96 | + case BRANCH3: |
| 97 | + cairo_set_source_rgb(cr, 243.0 / 255, 153.0 / 255, 193.0 / 255); |
| 98 | + break; |
| 99 | + case BRANCH4: |
| 100 | + cairo_set_source_rgb(cr, 123.0 / 255, 204.0 / 255, 153.0 / 255); |
| 101 | + break; |
| 102 | + default: |
| 103 | + break; |
| 104 | + } |
| 105 | +} |
| 106 | + |
| 107 | +bool RNABranchType_t::PerformBranchClassification(class RNAStructure * &rnaStructBase, unsigned int alength) { |
| 108 | + |
| 109 | + if(alength < 4) |
| 110 | + return false; |
| 111 | + |
| 112 | + // first we determine the four most enclosing arcs on the circle: |
| 113 | + RNAStructure::BaseData* mostEnclosingArcs[4] = {NULL, NULL, NULL, NULL}; |
| 114 | + unsigned int mostEnclosingArcsSize = 0; |
| 115 | + for(int rs = 0; rs < alength; rs++) { |
| 116 | + //fprintf(stderr, "[1] rs=%d\n", rs); |
| 117 | + RNAStructure::BaseData* rnaStruct = rnaStructBase->GetBaseAt(rs); |
| 118 | + if(rnaStruct->m_pair == RNAStructure::UNPAIRED) |
| 119 | + continue; |
| 120 | + else if(mostEnclosingArcsSize == 0) { |
| 121 | + mostEnclosingArcs[0] = rnaStructBase->GetBaseAt(rs); |
| 122 | + //fprintf(stderr, "mostEnclosingArcs[0]=%p\n", mostEnclosingArcs[0]); |
| 123 | + mostEnclosingArcsSize++; |
| 124 | + continue; |
| 125 | + } |
| 126 | + bool isEnclosedInLargerBranch = false; |
| 127 | + for(int mea = 0; mea < mostEnclosingArcsSize; mea++) { |
| 128 | + if(rnaStruct->isContainedIn(*(mostEnclosingArcs[mea]))) { |
| 129 | + isEnclosedInLargerBranch = true; |
| 130 | + //rnaStructBase->GetBranchTypeAt(rs).setBranchID((BranchID_t) (mea + 1)); |
| 131 | + //rnaStructBase->GetBranchTypeAt(rs).setBranchParent(mostEnclosingArcs[mea]); |
| 132 | + break; |
| 133 | + } |
| 134 | + } |
| 135 | + if(isEnclosedInLargerBranch) |
| 136 | + continue; // this cannot be an outer containing branch |
| 137 | + RNAStructure::BaseData *currentBaseData = rnaStructBase->GetBaseAt(rs);; |
| 138 | + unsigned int pairDistance = ABS(MAX(currentBaseData->m_pair, currentBaseData->m_index) - |
| 139 | + MIN(currentBaseData->m_pair, currentBaseData->m_index)); |
| 140 | + for(int mea = 0; mea < mostEnclosingArcsSize; mea++) { |
| 141 | + RNAStructure::BaseData *meaBaseData = mostEnclosingArcs[mea]; |
| 142 | + unsigned int meaPairDistance = ABS(MAX(meaBaseData->m_pair, meaBaseData->m_index) - |
| 143 | + MIN(meaBaseData->m_pair, meaBaseData->m_index)); |
| 144 | + bool needToResort = false; |
| 145 | + if(meaPairDistance < pairDistance && meaBaseData->m_pair > meaBaseData->m_index) { |
| 146 | + //fprintf(stderr, "[2] meaPD=%d, PD=%d\n", meaPairDistance, pairDistance); |
| 147 | + if(mostEnclosingArcsSize < 4) { |
| 148 | + mostEnclosingArcs[mostEnclosingArcsSize] = mostEnclosingArcs[mea]; |
| 149 | + mostEnclosingArcsSize++; |
| 150 | + needToResort = true; |
| 151 | + } |
| 152 | + mostEnclosingArcs[mea] = rnaStructBase->GetBaseAt(rs); |
| 153 | + //rnaStructBase->GetBranchTypeAt(rs)->setBranchID((BranchID_t) (mea + 1)); |
| 154 | + if(needToResort) { |
| 155 | + qsort(&mostEnclosingArcs[0], mostEnclosingArcsSize, |
| 156 | + sizeof(RNAStructure::BaseData*), compareMostEnclosingArcs); |
| 157 | + } |
| 158 | + break; |
| 159 | + } |
| 160 | + } |
| 161 | + } |
| 162 | + // sort the identified branches according to their natural order on the circle: |
| 163 | + IntIndexPair_t branchOrderMappings[mostEnclosingArcsSize]; |
| 164 | + for(int i = 0; i < mostEnclosingArcsSize; i++) { |
| 165 | + branchOrderMappings[i].intValue = MIN(mostEnclosingArcs[i]->m_index, mostEnclosingArcs[i]->m_pair); |
| 166 | + branchOrderMappings[i].index = i; |
| 167 | + } |
| 168 | + qsort(&branchOrderMappings[0], mostEnclosingArcsSize, sizeof(IntIndexPair_t), compareIntegerIndexPair); |
| 169 | + RNAStructure::BaseData* mostEnclosingArcsTemp[4]; |
| 170 | + for(int j = 0; j < mostEnclosingArcsSize; j++) { |
| 171 | + mostEnclosingArcsTemp[j] = mostEnclosingArcs[branchOrderMappings[j].index]; |
| 172 | + } |
| 173 | + for(int k = 0; k < mostEnclosingArcsSize; k++) { |
| 174 | + mostEnclosingArcs[k] = mostEnclosingArcsTemp[k]; |
| 175 | + } |
| 176 | + // now that we've identified most of the the enclosing branches, |
| 177 | + // we reset the branch types by number on all (except for the nubbins, |
| 178 | + // see below) entries in the array: |
| 179 | + vector<RNAStructure::BaseData *> enclosingArcs = getEnclosingArcs(rnaStructBase, false); |
| 180 | + sort(enclosingArcs.begin(), enclosingArcs.begin() + 4, RNAStructureBaseDataIndexSort()); |
| 181 | + if(enclosingArcs.size() < 7 || mostEnclosingArcsSize < 4) { |
| 182 | + fprintf(stderr, "HUGE LOGISTICAL ERROR: There are not 7 / 4 main arcs / branches to classify!\n"); |
| 183 | + return false; |
| 184 | + } |
| 185 | + // handle labeling of the arc nubbins (and contained pairs) to the sides of the big arcs: |
| 186 | + vector<RNAStructure::BaseData *> enclosingArcsNubbins(enclosingArcs.begin() + 4, enclosingArcs.begin() + 7); |
| 187 | + sort(enclosingArcsNubbins.begin(), enclosingArcsNubbins.end(), RNAStructureBaseDataIndexSort()); |
| 188 | + BranchID_t nubbinBranchIDs[3] = {BRANCH1, BRANCH2, BRANCH4}; |
| 189 | + for(int n = 0; n < 3; n++) { |
| 190 | + rnaStructBase->GetBranchTypeAt(enclosingArcsNubbins[n]->m_index)->setBranchID(nubbinBranchIDs[n]); |
| 191 | + rnaStructBase->GetBranchTypeAt(enclosingArcsNubbins[n]->m_index)->setBranchParent(enclosingArcs[(int) (nubbinBranchIDs[n] - 1)]); |
| 192 | + rnaStructBase->GetBranchTypeAt(enclosingArcsNubbins[n]->m_pair)->setBranchID(nubbinBranchIDs[n]); |
| 193 | + rnaStructBase->GetBranchTypeAt(enclosingArcsNubbins[n]->m_pair)->setBranchParent(enclosingArcs[(int) (nubbinBranchIDs[n] - 1)]); |
| 194 | + for(int b = 0; b < alength; b++) { |
| 195 | + RNAStructure::BaseData *curBase = rnaStructBase->GetBaseAt(b); |
| 196 | + if(curBase->isContainedIn(*(enclosingArcsNubbins[n])) || |
| 197 | + curBase->m_pair == RNAStructure::UNPAIRED && |
| 198 | + curBase->m_index < MAX(enclosingArcsNubbins[n]->m_index, enclosingArcsNubbins[n]->m_pair) && |
| 199 | + curBase->m_index > MIN(enclosingArcsNubbins[n]->m_index, enclosingArcsNubbins[n]->m_pair)) { |
| 200 | + rnaStructBase->GetBranchTypeAt(b)->setBranchID(nubbinBranchIDs[n]); |
| 201 | + rnaStructBase->GetBranchTypeAt(b)->setBranchParent(enclosingArcs[(int) (nubbinBranchIDs[n] - 1)]); |
| 202 | + } |
| 203 | + } |
| 204 | + } |
| 205 | + // handle labeling of the big arcs (and contained pairs and unpaired components contained within): |
| 206 | + for(int k = 0; k < 4; k++) { |
| 207 | + rnaStructBase->GetBranchTypeAt(enclosingArcs[k]->m_index)->setBranchID((BranchID_t) (k + 1)); |
| 208 | + rnaStructBase->GetBranchTypeAt(enclosingArcs[k]->m_index)->setBranchParent(enclosingArcs[k]); |
| 209 | + rnaStructBase->GetBranchTypeAt(enclosingArcs[k]->m_pair)->setBranchID((BranchID_t) (k + 1)); |
| 210 | + rnaStructBase->GetBranchTypeAt(enclosingArcs[k]->m_pair)->setBranchParent(enclosingArcs[k]); |
| 211 | + for(int b = 0; b < alength; b++) { |
| 212 | + RNAStructure::BaseData *curBase = rnaStructBase->GetBaseAt(b); |
| 213 | + if(curBase->isContainedIn(*(enclosingArcs[k])) || |
| 214 | + (curBase->m_pair == RNAStructure::UNPAIRED && curBase->m_index < MAX(enclosingArcs[k]->m_index, enclosingArcs[k]->m_pair) && |
| 215 | + curBase->m_index > MIN(enclosingArcs[k]->m_index, enclosingArcs[k]->m_pair))) { |
| 216 | + rnaStructBase->GetBranchTypeAt(b)->setBranchID((BranchID_t) (k + 1)); |
| 217 | + rnaStructBase->GetBranchTypeAt(b)->setBranchParent(enclosingArcs[k]); |
| 218 | + } |
| 219 | + } |
| 220 | + } |
| 221 | + // handle labeling of the unpaired components in between the big arcs: |
| 222 | + for(int b = 0; b < 4; b++) { |
| 223 | + int unpairedBetweenBranchCount = 0; |
| 224 | + vector<RNAStructure::BaseData *> unpairedBetweenVec; |
| 225 | + for(int v = 0; v < alength; v++) { |
| 226 | + RNAStructure::BaseData *curUnpaired = rnaStructBase->GetBaseAt(v); |
| 227 | + if(curUnpaired->isContainedIn(*(enclosingArcs[b])) || |
| 228 | + curUnpaired->m_index == enclosingArcs[b]->m_pair && curUnpaired->m_pair == enclosingArcs[b]->m_index) { |
| 229 | + rnaStructBase->GetBranchTypeAt(v)->setBranchID((BranchID_t) (b + 1)); |
| 230 | + rnaStructBase->GetBranchTypeAt(v)->setBranchParent(enclosingArcs[b]); |
| 231 | + } |
| 232 | + else if(curUnpaired->m_pair == RNAStructure::UNPAIRED && |
| 233 | + curUnpaired->m_index > MAX(enclosingArcs[b]->m_pair, enclosingArcs[b]->m_index) && (b == 3 || |
| 234 | + curUnpaired->m_index < MIN(enclosingArcs[b + 1]->m_pair, enclosingArcs[b + 1]->m_index))) { // we want the unpaired bases between branches <- : |
| 235 | + unpairedBetweenBranchCount++; |
| 236 | + unpairedBetweenVec.push_back(curUnpaired); |
| 237 | + } |
| 238 | + } |
| 239 | + for(int up = 0; up < unpairedBetweenVec.size(); up++) { |
| 240 | + if(b < 3 && up <= unpairedBetweenVec.size() / 2) { |
| 241 | + rnaStructBase->GetBranchTypeAt(unpairedBetweenVec[up]->m_index)->setBranchID((BranchID_t) (b + 1)); |
| 242 | + rnaStructBase->GetBranchTypeAt(unpairedBetweenVec[up]->m_index)->setBranchParent(enclosingArcs[b]); |
| 243 | + } |
| 244 | + else if(b < 3) { |
| 245 | + rnaStructBase->GetBranchTypeAt(unpairedBetweenVec[up]->m_index)->setBranchID((BranchID_t) (b + 2)); |
| 246 | + rnaStructBase->GetBranchTypeAt(unpairedBetweenVec[up]->m_index)->setBranchParent(enclosingArcs[b + 1]); |
| 247 | + } |
| 248 | + else { |
| 249 | + rnaStructBase->GetBranchTypeAt(unpairedBetweenVec[up]->m_index)->setBranchID(BRANCH4); |
| 250 | + rnaStructBase->GetBranchTypeAt(unpairedBetweenVec[up]->m_index)->setBranchParent(enclosingArcs[3]); |
| 251 | + } |
| 252 | + } |
| 253 | + } |
| 254 | + // handle labeling of the unpaired components before the first big arc: |
| 255 | + for(int up = 0; up < alength; up++) { |
| 256 | + RNAStructure::BaseData *curUnpaired = rnaStructBase->GetBaseAt(up); |
| 257 | + if(curUnpaired->m_index < MIN(enclosingArcs[0]->m_index, enclosingArcs[0]->m_pair) && curUnpaired->m_pair == RNAStructure::UNPAIRED) { |
| 258 | + rnaStructBase->GetBranchTypeAt(up)->setBranchID(BRANCH1); |
| 259 | + rnaStructBase->GetBranchTypeAt(up)->setBranchParent(enclosingArcs[0]); |
| 260 | + } |
| 261 | + } |
| 262 | + return true; |
| 263 | +} |
0 commit comments