Skip to content

Commit 8bc682c

Browse files
committed
update code style
1 parent 7e88470 commit 8bc682c

File tree

1 file changed

+100
-96
lines changed

1 file changed

+100
-96
lines changed

R/bambu-extendAnnotations-utilityExtend.R

Lines changed: 100 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ isore.extendAnnotations <- function(combinedTranscripts, annotationGrangesList,
2323
rowDataSplicedTibble, annotationGrangesList,
2424
min.exonDistance, min.primarySecondaryDist,
2525
min.primarySecondaryDistStartEnd, verbose)
26-
} else{ rowDataFilteredSpliced = NULL}
26+
} else{
27+
rowDataFilteredSpliced <- NULL
28+
}
2729
rowDataFilteredUnspliced <- rowDataTibble[which(confidenceTypeVec == "unsplicedNew"),]
2830
SEnRng <- addNewUnsplicedReadClasses(rowDataFilteredUnspliced,
2931
rowDataFilteredSpliced, transcriptRanges$exons,
@@ -98,19 +100,19 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList
98100
rowDataCombined[!notCompatibleIds,],
99101
exonRangesCombined[!notCompatibleIds])
100102
rowDataCombined$maxTxScore[grepl("compatible", rowDataCombined$readClassType) &
101-
rowDataCombined$readClassType != "equal:compatible"]=-1
103+
rowDataCombined$readClassType != "equal:compatible"] <- -1
102104
rowDataCombined$maxTxScore.noFit[grepl("compatible", rowDataCombined$readClassType) &
103-
rowDataCombined$readClassType != "equal:compatible"]=-1
105+
rowDataCombined$readClassType != "equal:compatible"] <- -1
104106
}
105107
#(2) remove transcripts below NDR threshold/identical junctions to annotations
106-
rowDataCombined = calculateNDROnTranscripts(rowDataCombined,
108+
rowDataCombined <- calculateNDROnTranscripts(rowDataCombined,
107109
useTxScore = length(annotationGrangesList)==0)
108110
if(length(annotationGrangesList)>0){ #only recommend an NDR if its possible to calculate an NDR
109-
NDR = recommendNDR(rowDataCombined, baselineFDR, NDR, defaultModels, verbose)
110-
} else {
111-
if(is.null(NDR)) NDR = 0.5
111+
NDR <- recommendNDR(rowDataCombined, baselineFDR, NDR, defaultModels, verbose)
112+
} else if(is.null(NDR)) {
113+
NDR <- 0.5
112114
}
113-
filterSet = (rowDataCombined$NDR <= NDR | rowDataCombined$readClassType == "equal:compatible")
115+
filterSet <- (rowDataCombined$NDR <= NDR | rowDataCombined$readClassType == "equal:compatible")
114116
lowConfidenceTranscripts <- combindRowDataWithRanges(
115117
rowDataCombined[!filterSet,],
116118
exonRangesCombined[!filterSet])
@@ -156,21 +158,21 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList
156158
#' @noRd
157159
recommendNDR <- function(combinedTranscripts, baselineFDR = 0.1, NDR = NULL, defaultModels = defaultModels, verbose = FALSE){
158160
if(verbose) message("-- Predicting annotation completeness to determine NDR threshold --")
159-
combinedTranscripts = combinedTranscripts[combinedTranscripts$maxTxScore.noFit >=0, ] #ignore filtered out read classes
160-
equal = combinedTranscripts$readClassType == "equal:compatible"
161-
equal[is.na(equal)] = FALSE
161+
combinedTranscripts <- combinedTranscripts[combinedTranscripts$maxTxScore.noFit >=0, ] #ignore filtered out read classes
162+
equal <- combinedTranscripts$readClassType == "equal:compatible"
163+
equal[is.na(equal)] <- FALSE
162164
#add envirnment so poly() works
163165
attr(defaultModels$lmNDR[["terms"]], ".Environment") <- new.env(parent = parent.env(globalenv()))
164-
baseline = predict(defaultModels$lmNDR, newdata=data.frame(NDR=baselineFDR))
165-
attr(defaultModels$lmNDR[["terms"]], ".Environment") = c()
166-
167-
score = combinedTranscripts$maxTxScore.noFit
168-
score[is.na(score)] = 0
169-
NDRscores = calculateNDR(score, equal)
170-
NDR.rec = predict(lm(NDRscores~poly(score,3,raw=TRUE)), newdata=data.frame(score=baseline))
171-
NDR.rec = round(NDR.rec,3)
172-
if(NDR.rec > 1){NDR.rec = 0.999}
173-
if (NDR.rec < 0) {NDR.rec = 0}
166+
baseline <- predict(defaultModels$lmNDR, newdata=data.frame(NDR=baselineFDR))
167+
attr(defaultModels$lmNDR[["terms"]], ".Environment") <- c()
168+
169+
score <- combinedTranscripts$maxTxScore.noFit
170+
score[is.na(score)] <- 0
171+
NDRscores <- calculateNDR(score, equal)
172+
NDR.rec <- predict(lm(NDRscores~poly(score,3,raw=TRUE)), newdata=data.frame(score=baseline))
173+
NDR.rec <- round(NDR.rec,3)
174+
if(NDR.rec > 1) NDR.rec <- 0.999
175+
if (NDR.rec < 0) NDR.rec <- 0
174176
if(verbose) message("Recommended NDR for baseline FDR of ", baselineFDR, " = ", NDR.rec)
175177
if(NDR.rec > 0.5){
176178
message("A high NDR threshold is being recommended by Bambu indicating high levels of novel transcripts, ",
@@ -181,33 +183,29 @@ recommendNDR <- function(combinedTranscripts, baselineFDR = 0.1, NDR = NULL, def
181183
}
182184

183185
#if users are using an NDR let them know if the recommended NDR is different
184-
if(is.null(NDR))
185-
{
186-
NDR = NDR.rec
186+
if(is.null(NDR)) {
187+
NDR <- NDR.rec
187188
message("Using a novel discovery rate (NDR) of: ", NDR)
188-
}
189-
else{
190-
if(abs(NDR.rec-NDR)>=0.1){
189+
} else if(abs(NDR.rec-NDR)>=0.1){
191190
message(paste0("For your combination of sample and reference annotations we recommend an NDR of ", NDR.rec,
192191
". You are currently using an NDR threshold of ", NDR,
193192
". A higher NDR is suited for samples where the reference annotations are poor and more novel transcripts are expected,",
194193
"whereas a lower NDR is suited for samples with already high quality annotations"))
195-
}
196194
}
197195
return(NDR)
198196
}
199197

200-
recommendNDR.onAnnotations = function(annotations, prefix = "Bambu", baselineFDR = 0.1, defaultModels2 = defaultModels2){
201-
mcols = mcols(annotations)[!is.na(mcols(annotations)$maxTxScore),]
202-
equal = !grepl(prefix, mcols$TXNAME)
198+
recommendNDR.onAnnotations <- function(annotations, prefix = "Bambu", baselineFDR = 0.1, defaultModels2 = defaultModels2){
199+
mcols <- mcols(annotations)[!is.na(mcols(annotations)$maxTxScore),]
200+
equal <- !grepl(prefix, mcols$TXNAME)
203201
#add envirnment so poly() works
204202
attr(defaultModels2$lmNDR[["terms"]], ".Environment") <- new.env(parent = parent.env(globalenv()))
205-
baseline = predict(defaultModels2$lmNDR, newdata=data.frame(NDR=baselineFDR))
206-
attr(defaultModels2$lmNDR[["terms"]], ".Environment") = c()
207-
score = mcols$maxTxScore.noFit
208-
NDRscores = calculateNDR(score, equal)
209-
NDR.rec = predict(lm(NDRscores~poly(score,3,raw=TRUE)), newdata=data.frame(score=baseline))
210-
NDR.rec = round(NDR.rec,3)
203+
baseline <- predict(defaultModels2$lmNDR, newdata=data.frame(NDR=baselineFDR))
204+
attr(defaultModels2$lmNDR[["terms"]], ".Environment") <- c()
205+
score <- mcols$maxTxScore.noFit
206+
NDRscores <- calculateNDR(score, equal)
207+
NDR.rec <- predict(lm(NDRscores~poly(score,3,raw=TRUE)), newdata=data.frame(score=baseline))
208+
NDR.rec <- round(NDR.rec,3)
211209
return(NDR.rec)
212210
}
213211

@@ -216,25 +214,27 @@ recommendNDR.onAnnotations = function(annotations, prefix = "Bambu", baselineFDR
216214
#' @noRd
217215
calculateNDROnTranscripts <- function(combinedTranscripts, useTxScore = FALSE){
218216
# calculate and filter by NDR
219-
equal = combinedTranscripts$readClassType == "equal:compatible"
220-
equal[is.na(equal)] = FALSE
217+
equal <- combinedTranscripts$readClassType == "equal:compatible"
218+
equal[is.na(equal)] <- FALSE
221219
if(sum(equal, na.rm = TRUE)<50 | sum(!equal, na.rm = TRUE)<50 | useTxScore){
222-
combinedTranscripts$NDR = 1 - combinedTranscripts$maxTxScore
220+
combinedTranscripts$NDR <- 1 - combinedTranscripts$maxTxScore
223221
if(!useTxScore) message("WARNING - Less than 50 TRUE or FALSE read classes ",
224222
"for NDR precision stabilization.")
225223
message("NDR will be approximated as: (1 - Transcript Model Prediction Score)")
226-
} else combinedTranscripts$NDR = calculateNDR(combinedTranscripts$maxTxScore, equal)
227-
combinedTranscripts$NDR[combinedTranscripts$maxTxScore==-1] = 1
224+
} else {
225+
combinedTranscripts$NDR <- calculateNDR(combinedTranscripts$maxTxScore, equal)
226+
}
227+
combinedTranscripts$NDR[combinedTranscripts$maxTxScore==-1] <- 1
228228
return(combinedTranscripts)
229229
}
230230

231231
#' calculates the minimum NDR for each score
232232
#' @noRd
233-
calculateNDR = function(score, labels){
234-
scoreOrder = order(score, decreasing = TRUE)
235-
labels = labels[scoreOrder]
236-
NDR = cumsum(!labels)/(seq_len(length(labels))) #calculate NDR
237-
NDR = rev(cummin(rev(NDR))) #flatten NDR so its never higher than a lower ranked RC
233+
calculateNDR <- function(score, labels){
234+
scoreOrder <- order(score, decreasing = TRUE)
235+
labels <- labels[scoreOrder]
236+
NDR <- cumsum(!labels)/(seq_len(length(labels))) #calculate NDR
237+
NDR <- rev(cummin(rev(NDR))) #flatten NDR so its never higher than a lower ranked RC
238238
return(NDR[order(scoreOrder)]) #return to original order
239239
}
240240

@@ -245,8 +245,9 @@ calculateNDR = function(score, labels){
245245
#' @noRd
246246
makeExonsIntronsSpliced <- function(transcriptsTibble,annotationSeqLevels){
247247
if(all(is.na(transcriptsTibble$intronStarts))){
248-
intronsByReadClass = GRangesList()}
249-
else { intronsByReadClass <- makeGRangesListFromFeatureFragments(
248+
intronsByReadClass <- GRangesList()
249+
} else {
250+
intronsByReadClass <- makeGRangesListFromFeatureFragments(
250251
seqnames = transcriptsTibble$chr,
251252
fragmentStarts = transcriptsTibble$intronStarts,
252253
fragmentEnds = transcriptsTibble$intronEnds,
@@ -686,7 +687,7 @@ combindRowDataWithRanges <- function(rowDataCombinedFiltered, exonRangesCombined
686687
#' @noRd
687688
combineWithAnnotations <- function(rowDataCombinedFiltered,
688689
extendedAnnotationRanges,annotationGrangesList, prefix){
689-
equalRanges = rowDataCombinedFiltered[!(rowDataCombinedFiltered$novelTranscript),]
690+
equalRanges <- rowDataCombinedFiltered[!(rowDataCombinedFiltered$novelTranscript),]
690691
#remove extended ranges that are already present in annotation
691692
extendedAnnotationRanges <- extendedAnnotationRanges[rowDataCombinedFiltered$novelTranscript]
692693
annotationRangesToMerge <- annotationGrangesList
@@ -700,12 +701,12 @@ combineWithAnnotations <- function(rowDataCombinedFiltered,
700701
mcols(annotationRangesToMerge)$maxTxScore.noFit <- NA
701702
mcols(extendedAnnotationRanges) <- mcols(extendedAnnotationRanges)[,colnames(mcols(extendedAnnotationRanges))]
702703
#copy over stats to annotations from read classes
703-
mcols(annotationRangesToMerge[equalRanges$TXNAME])$NDR = equalRanges$NDR
704-
mcols(annotationRangesToMerge[equalRanges$TXNAME])$maxTxScore = equalRanges$maxTxScore
705-
mcols(annotationRangesToMerge[equalRanges$TXNAME])$readCount = equalRanges$readCount
706-
mcols(annotationRangesToMerge[equalRanges$TXNAME])$relReadCount = equalRanges$relReadCount
707-
mcols(annotationRangesToMerge[equalRanges$TXNAME])$maxTxScore = equalRanges$maxTxScore
708-
mcols(annotationRangesToMerge[equalRanges$TXNAME])$maxTxScore.noFit = equalRanges$maxTxScore.noFit
704+
mcols(annotationRangesToMerge[equalRanges$TXNAME])$NDR <- equalRanges$NDR
705+
mcols(annotationRangesToMerge[equalRanges$TXNAME])$maxTxScore <- equalRanges$maxTxScore
706+
mcols(annotationRangesToMerge[equalRanges$TXNAME])$readCount <- equalRanges$readCount
707+
mcols(annotationRangesToMerge[equalRanges$TXNAME])$relReadCount <- equalRanges$relReadCount
708+
mcols(annotationRangesToMerge[equalRanges$TXNAME])$maxTxScore <- equalRanges$maxTxScore
709+
mcols(annotationRangesToMerge[equalRanges$TXNAME])$maxTxScore.noFit <- equalRanges$maxTxScore.noFit
709710
#mcols(annotationRangesToMerge[equalRanges$TXNAME])$relSubsetCount = equalRanges$relSubsetCount
710711
}
711712
if (length(extendedAnnotationRanges)) {
@@ -717,16 +718,16 @@ combineWithAnnotations <- function(rowDataCombinedFiltered,
717718
}else{
718719
extendedAnnotationRanges <- annotationRangesToMerge
719720
mcols(extendedAnnotationRanges)$txid <- seq_along(extendedAnnotationRanges)
720-
mcols(extendedAnnotationRanges)$relReadCount = NA
721+
mcols(extendedAnnotationRanges)$relReadCount <- NA
721722
#mcols(extendedAnnotationRanges)$relSubsetCount = NA
722723
}
723724
return(extendedAnnotationRanges)
724725
}
725726

726727
#' calculate relative subset read count after filtering (increase speed, subsets are not considered here)'
727728
#' @noRd
728-
calculateRelSubsetCount = function(extendedAnnotationRanges, minEq, min.readFractionByEqClass){
729-
filter = !is.na(mcols(extendedAnnotationRanges)$readCount)
729+
calculateRelSubsetCount <- function(extendedAnnotationRanges, minEq, min.readFractionByEqClass){
730+
filter <- !is.na(mcols(extendedAnnotationRanges)$readCount)
730731
mcols(extendedAnnotationRanges)$relSubsetCount <- NA
731732
mcols(extendedAnnotationRanges)$relSubsetCount[filter] <-
732733
mcols(extendedAnnotationRanges)$readCount[filter]/
@@ -834,7 +835,7 @@ addGeneIdsToReadClassTable <- function(readClassTable, distTable,
834835
#' @details
835836
#' @return extendedAnnotations with a new NDR threshold
836837
#' @export
837-
setNDR = function(extendedAnnotations, NDR = NULL, includeRef = FALSE, prefix = 'Bambu', baselineFDR = 0.1, defaultModels2 = defaultModels){
838+
setNDR <- function(extendedAnnotations, NDR = NULL, includeRef = FALSE, prefix = 'Bambu', baselineFDR = 0.1, defaultModels2 = defaultModels){
838839
#Check to see if the annotations/gtf are dervived from Bambu
839840
if(is.null(mcols(extendedAnnotations)$NDR)){
840841
warning("Annotations were not extended by Bambu (or the wrong prefix was provided). NDR can not be set")
@@ -846,76 +847,79 @@ setNDR = function(extendedAnnotations, NDR = NULL, includeRef = FALSE, prefix =
846847

847848
#recommend an NDR (needed when users read in Bambu GTF)
848849
if(is.null(NDR)){
849-
tempAnno = c(extendedAnnotations, metadata(extendedAnnotations)$lowConfidenceTranscripts)
850-
NDR = recommendNDR.onAnnotations(tempAnno, prefix = prefix, baselineFDR = baselineFDR, defaultModels2 = defaultModels2)
850+
tempAnno <- c(extendedAnnotations, metadata(extendedAnnotations)$lowConfidenceTranscripts)
851+
NDR <- recommendNDR.onAnnotations(tempAnno, prefix = prefix, baselineFDR = baselineFDR, defaultModels2 = defaultModels2)
851852
message("Recommending a novel discovery rate (NDR) of: ", NDR)
852853
}
853854

854855
#If reference annotations should be filtered too (note that reference annotations with no read support arn't filtered)
855856
if(includeRef){
856-
toRemove = (!is.na(mcols(extendedAnnotations)$NDR) & mcols(extendedAnnotations)$NDR > NDR)
857-
toAdd = !is.na(mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR) &
857+
toRemove <- (!is.na(mcols(extendedAnnotations)$NDR) & mcols(extendedAnnotations)$NDR > NDR)
858+
toAdd <- !is.na(mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR) &
858859
mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR
859860
} else {
860-
toRemove = (mcols(extendedAnnotations)$NDR > NDR &
861+
toRemove <- (mcols(extendedAnnotations)$NDR > NDR &
861862
grepl(prefix, mcols(extendedAnnotations)$TXNAME))
862-
toAdd = (mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR &
863+
toAdd <- (mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR &
863864
grepl(prefix, mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$TXNAME))
864865
}
865866

866-
temp = c(metadata(extendedAnnotations)$lowConfidenceTranscripts[!toAdd], extendedAnnotations[toRemove])
867-
extendedAnnotations = c(extendedAnnotations[!toRemove], metadata(extendedAnnotations)$lowConfidenceTranscripts[toAdd])
868-
metadata(extendedAnnotations)$lowConfidenceTranscripts = temp
867+
temp <- c(metadata(extendedAnnotations)$lowConfidenceTranscripts[!toAdd], extendedAnnotations[toRemove])
868+
extendedAnnotations <- c(extendedAnnotations[!toRemove], metadata(extendedAnnotations)$lowConfidenceTranscripts[toAdd])
869+
metadata(extendedAnnotations)$lowConfidenceTranscripts <- temp
869870

870871
mcols(extendedAnnotations)$txid <- seq_along(extendedAnnotations)
871872
minEqClasses <- getMinimumEqClassByTx(extendedAnnotations)
872873
mcols(extendedAnnotations)$eqClassById <- minEqClasses$eqClassById
873874

874-
metadata(extendedAnnotations)$NDRthreshold = NDR
875+
metadata(extendedAnnotations)$NDRthreshold <- NDR
875876

876877
return(extendedAnnotations)
877878
}
878879

880+
881+
#' Extend annotations by clusters (work in progress?)
882+
#' @noRd
879883
isore.extendAnnotations.clusters <- function(readClassList, annotations, clusters, NDR, isoreParameters, stranded, bpParameters, fusionMode, verbose = FALSE){
880884
message("--- Start extending annotations for clusters ---")
881885
#if clustering is a csv, create a list with the barcodes for each cluster
882886
#csv must have two cols with heading barcode, cluster
883887
if(!is.list(clusters)){
884-
clusters = read.csv(clusters)
885-
clusters = clusters %>% group_by(cluster) %>% summarise(barcodes = list(barcode))
886-
clusters = clusters$cluster
887-
clusters = clusters$barcodes
888-
names(clusters) = clusters
888+
clusters <- read.csv(clusters)
889+
clusters <- clusters %>% group_by(cluster) %>% summarise(barcodes = list(barcode))
890+
clusters <- clusters$cluster
891+
clusters <- clusters$barcodes
892+
names(clusters) <- clusters
889893
}
890-
annotations.clusters = list()
891-
rcfs.clusters = list()
892-
clusters.rc = splitReadClassFilesByRC(readClassList[[1]])
893-
txScores = c()
894+
annotations.clusters <- list()
895+
rcfs.clusters <- list()
896+
clusters.rc <- splitReadClassFilesByRC(readClassList[[1]])
897+
txScores <- c()
894898
for(i in seq_along(clusters)){
895899
print(names(clusters)[i])
896900
###TODO need to account for the sample name here which is added to the barcode
897-
index = match(clusters[[i]],gsub('demultiplexed','',metadata(readClassList[[1]])$samples))
898-
index = index[!is.na(index)]
901+
index <- match(clusters[[i]],gsub('demultiplexed','',metadata(readClassList[[1]])$samples))
902+
index <- index[!is.na(index)]
899903
print(length(index))
900904
if(length(index)<20) next
901-
rcf.counts = clusters.rc[,index]
902-
rcf.filt = readClassList[[1]][rowSums(rcf.counts)>0,]
903-
rowData(rcf.filt)$readCount = rowSums(rcf.counts)[rowSums(rcf.counts)>0]
904-
countsTBL = calculateGeneProportion(counts=mcols(rcf.filt)$readCount,
905+
rcf.counts <- clusters.rc[,index]
906+
rcf.filt <- readClassList[[1]][rowSums(rcf.counts)>0,]
907+
rowData(rcf.filt)$readCount <- rowSums(rcf.counts)[rowSums(rcf.counts)>0]
908+
countsTBL <- calculateGeneProportion(counts=mcols(rcf.filt)$readCount,
905909
geneIds=mcols(rcf.filt)$GENEID)
906-
rowData(rcf.filt)$geneReadProp = countsTBL$geneReadProp
907-
rowData(rcf.filt)$geneReadCount = countsTBL$geneReadCount
908-
rowData(rcf.filt)$startSD = 0
909-
rowData(rcf.filt)$endSD = 0
910-
rowData(rcf.filt)$readCount.posStrand = 0
911-
thresholdIndex = which(rowData(rcf.filt)$readCount>=isoreParameters$min.readCount)
912-
model = trainBambu(rcf.filt, verbose = verbose, min.readCount = isoreParameters$min.readCount)
913-
txScore = getTranscriptScore(rowData(rcf.filt)[thresholdIndex,], model,
910+
rowData(rcf.filt)$geneReadProp <- countsTBL$geneReadProp
911+
rowData(rcf.filt)$geneReadCount <- countsTBL$geneReadCount
912+
rowData(rcf.filt)$startSD <- 0
913+
rowData(rcf.filt)$endSD <- 0
914+
rowData(rcf.filt)$readCount.posStrand <- 0
915+
thresholdIndex <- which(rowData(rcf.filt)$readCount>=isoreParameters$min.readCount)
916+
model <- trainBambu(rcf.filt, verbose = verbose, min.readCount = isoreParameters$min.readCount)
917+
txScore <- getTranscriptScore(rowData(rcf.filt)[thresholdIndex,], model,
914918
defaultModels)
915-
rowData(rcf.filt)$txScore = rep(NA,nrow(rcf.filt))
916-
rowData(rcf.filt)$txScore[thresholdIndex] = txScore
919+
rowData(rcf.filt)$txScore <- rep(NA,nrow(rcf.filt))
920+
rowData(rcf.filt)$txScore[thresholdIndex] <- txScore
917921
#txScores = cbind(txScores, rowData(rcf.filt)$txScore)
918-
rcfs.clusters[[names(clusters)[i]]] = rcf.filt
922+
rcfs.clusters[[names(clusters)[i]]] <- rcf.filt
919923
annotations.clusters[[names(clusters)[i]]] <- bambu.extendAnnotations(list(rcf.filt), annotations, NDR,
920924
isoreParameters, stranded, bpParameters, fusionMode, verbose)
921925
}

0 commit comments

Comments
 (0)