@@ -17,7 +17,7 @@ isore.combineTranscriptCandidates <- function(readClassList,
1717 combinedSplicedTranscripts <-
1818 combineSplicedTranscriptModels(readClassList , bpParameters ,
1919 min.readCount , min.readFractionByGene ,
20- min.txScore.multiExon , min.txScore.singleExon , verbose ) % > % data.table()
20+ min.txScore.multiExon , min.txScore.singleExon , verbose )
2121 combinedSplicedTranscripts [,confidenceType : = " highConfidenceJunctionReads" ]
2222 # when single exon min score is greater than 1, skip unspliced transcripts combination
2323 # this is a very customized config, useful when data is very big
@@ -92,40 +92,34 @@ sequentialCombineFeatureTibble <- function(readClassList,
9292
9393# ' @noRd
9494updateStartEndReadCount <- function (combinedFeatureTibble ){
95- combinedFeatureTibble <- combinedFeatureTibble %> %
96- mutate(rowID = row_number())
97-
98- startEndCountTibble <- combinedFeatureTibble %> %
99- select(rowID , starts_with(" start" ),starts_with(" end" ),
100- starts_with(" readCount" )) %> %
101- tidyr :: pivot_longer(c(starts_with(" start" ),starts_with(" end" ),
102- starts_with(" readCount" )), names_to = c(" .value" ," set" ),
103- names_pattern = " (.*)\\ .(.)" ) %> %
104- group_by(rowID ) %> %
105- mutate(sumReadCount = sum(readCount ,na.rm = TRUE ))
95+ setDT(combinedFeatureTibble )
96+ combinedFeatureTibble [, rowID : = .I ]
10697
107- startTibble <- select(startEndCountTibble , rowID , start , readCount ,
108- sumReadCount ) %> %
109- arrange(start ) %> %
110- filter(cumsum(readCount )/ sumReadCount > = 0.5 ) %> %
111- filter(row_number()== 1 )
112- endTibble <- select(startEndCountTibble , rowID , end , readCount ,
113- sumReadCount ) %> %
114- arrange(end ) %> %
115- filter(cumsum(readCount )/ sumReadCount > = 0.5 ) %> %
116- filter(row_number()== 1 )
98+ colNames <- colnames(combinedFeatureTibble )
99+ readCountCols <- sort(colNames [grep(" ^readCount" , colNames )]) # to make sure it's ordered by sample name
100+ startCols <- sort(colNames [grep(" ^start" , colNames )])
101+ endCols <- sort(colNames [grep(" ^end" , colNames )])
117102
118- combinedFeatureTibble <- combinedFeatureTibble %> %
119- dplyr :: select(intronStarts , intronEnds , chr , strand , maxTxScore ,
120- maxTxScore.noFit , NSampleReadCount , NSampleReadProp ,
121- NSampleTxScore , rowID ) %> %
122- full_join(select(startTibble , rowID , start ), by = " rowID" ) %> %
123- full_join(select(endTibble , rowID , end , readCount = sumReadCount ),
124- by = " rowID" ) %> %
125- select(- rowID )
103+ startEndDt <- combinedFeatureTibble [,
104+ .(start = readCountWeightedMedian(.SD ,x ,y ),
105+ end = readCountWeightedMedian(.SD ,z ,y ),
106+ readCount = sum(.SD [,y ], na.rm = TRUE )),
107+ by = rowID , env = I(list (x = startCols , y = readCountCols ,z = endCols ))]
108+
109+ combinedFeatureTibble <- startEndDt [combinedFeatureTibble [,.(intronStarts , intronEnds , chr , strand , maxTxScore ,
110+ maxTxScore.noFit , NSampleReadCount , NSampleReadProp ,
111+ NSampleTxScore , rowID )], on = " rowID" ]
112+ combinedFeatureTibble [, rowID : = NULL ]
126113 return (combinedFeatureTibble )
127114}
128115
116+ # ' Function to get median value without interpolation using certain column names
117+ # ' @noRd
118+ readCountWeightedMedian <- function (dt , valuevar , timesvar ){
119+ sortVector <- rep(na.omit(unlist(dt [,..valuevar ])),
120+ times = as.integer(na.omit(unlist(dt [,..timesvar ]))))
121+ return (min(sortVector [sortVector > = quantile(sortVector , probs = 0.5 )]))
122+ }
129123
130124
131125# ' Function to combine featureTibble and create the NSample variables
0 commit comments