Skip to content

Commit c5e923d

Browse files
authored
Merge branch 'Multiplex_Major_Patch' into devel
2 parents 1463b08 + 9572dc0 commit c5e923d

File tree

49 files changed

+2943
-515
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+2943
-515
lines changed

.github/workflows/check-bioc.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ env:
4040
run_covr: 'false'
4141
run_pkgdown: 'false'
4242
has_RUnit: 'false'
43-
cache-version: 'cache-v3'
43+
cache-version: 'cache-v4'
4444
run_docker: 'false'
4545

4646
jobs:
@@ -56,7 +56,7 @@ jobs:
5656
config:
5757
- { os: ubuntu-latest, r: '4.4.2', bioc: '3.20', cont: "bioconductor/bioconductor_docker:RELEASE_3_20", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
5858
- { os: macOS-latest, r: '4.4.2', bioc: '3.20'}
59-
##- { os: windows-latest, r: '4.3', bioc: '3.18'}
59+
## - { os: windows-latest, r: '4.4', bioc: '3.20'}
6060
## Check https://github.com/r-lib/actions/tree/master/examples
6161
## for examples using the http-user-agent
6262
env:
@@ -81,7 +81,7 @@ jobs:
8181
## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml
8282
## If they update their steps, we will also need to update ours.
8383
- name: Checkout Repository
84-
uses: actions/checkout@v3
84+
uses: actions/checkout@v4
8585

8686
## R is already included in the Bioconductor docker images
8787
- name: Setup R from r-lib
@@ -104,15 +104,15 @@ jobs:
104104

105105
- name: Restore R package cache
106106
if: "!contains(github.event.head_commit.message, '/nocache') && runner.os != 'Linux'"
107-
uses: actions/cache@v3
107+
uses: actions/cache@v4
108108
with:
109109
path: ${{ env.R_LIBS_USER }}
110110
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.4.2-${{ hashFiles('.github/depends.Rds') }}
111111
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.4.2-
112112

113113
- name: Cache R packages on Linux
114114
if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' "
115-
uses: actions/cache@v3
115+
uses: actions/cache@v4
116116
with:
117117
path: /home/runner/work/_temp/Library
118118
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4.2-${{ hashFiles('.github/depends.Rds') }}

.github/workflows/lint.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
env:
1616
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
1717
steps:
18-
- uses: actions/checkout@v2
18+
- uses: actions/checkout@v4
1919

2020
- uses: r-lib/actions/setup-r@v2
2121
with:

.github/workflows/pr-commands.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
env:
1515
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
1616
steps:
17-
- uses: actions/checkout@v2
17+
- uses: actions/checkout@v4
1818

1919
- uses: r-lib/actions/pr-fetch@v2
2020
with:
@@ -49,7 +49,7 @@ jobs:
4949
env:
5050
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
5151
steps:
52-
- uses: actions/checkout@v2
52+
- uses: actions/checkout@v4
5353

5454
- uses: r-lib/actions/pr-fetch@v2
5555
with:

.github/workflows/test-coverage.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
1717

1818
steps:
19-
- uses: actions/checkout@v3
19+
- uses: actions/checkout@v4
2020

2121
- uses: r-lib/actions/setup-r@v2
2222
with:

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ Imports:
8686
Rsamtools,
8787
methods,
8888
Rcpp,
89-
xgboost
89+
xgboost,
90+
Matrix
9091
VignetteBuilder:
9192
knitr
9293
LazyData: true

NAMESPACE

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ export(readFromGTF)
77
export(transcriptToGeneExpression)
88
export(writeBambuOutput)
99
export(writeToGTF)
10+
export(writeAnnotationsToGTF)
1011
export(trainBambu)
12+
export(setNDR)
1113
export(compareTranscripts)
1214
importFrom(stats,predict)
1315
importFrom(BiocGenerics,basename)
@@ -73,7 +75,8 @@ import(data.table, except=c(last, first, shift, second, between))
7375
import(dplyr, except=c(last, first, desc, union, setdiff, intersect, slice))
7476
import(IRanges, except=c(slice, collapse, setdiff, intersect,cor))
7577
import(SummarizedExperiment)
76-
import(S4Vectors, except=c(rename, setequal, setdiff, intersect,cor))
78+
import(Matrix)
79+
import(S4Vectors, except=c(rename, setequal, setdiff, intersect,cor, unname, expand))
7780
useDynLib(bambu, .registration = TRUE)
7881
import(xgboost)
7982
import(BSgenome)

R/bambu-assignDist.R

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#' Create equivilence classes and assign to transcripts
2+
#' @inheritParams bambu
3+
#' @import data.table
4+
#' @noRd
5+
assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters,
6+
verbose, demultiplexed, spatial,
7+
returnDistTable = FALSE, trackReads = TRUE) {
8+
if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
9+
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
10+
readClassList <- splitReadClassFiles(readClassList)
11+
readClassDt <- genEquiRCs(metadata(readClassList)$readClassDist, annotations, verbose)
12+
readClassDt$eqClass.match = match(readClassDt$eqClassById,metadata(readClassList)$eqClassById)
13+
readClassDt <- simplifyNames(readClassDt)
14+
readClassDt <- readClassDt %>% group_by(eqClassId, gene_sid) %>%
15+
mutate(multi_align = length(unique(txid))>1) %>%
16+
ungroup() %>%
17+
mutate(aval = 1) %>%
18+
data.table()
19+
#return non-em counts
20+
ColData <- generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial)
21+
quantData <- SummarizedExperiment(assays = SimpleList(
22+
counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
23+
rowRanges = annotations,
24+
colData = ColData)
25+
colnames(quantData) <- ColData$id
26+
if(sum(metadata(readClassList)$incompatibleCountMatrix)==0){
27+
metadata(quantData)$incompatibleCounts <- NULL
28+
}else{
29+
metadata(quantData)$incompatibleCounts <- generateIncompatibleCounts(metadata(readClassList)$incompatibleCountMatrix, annotations)
30+
}
31+
metadata(quantData)$nonuniqueCounts <- generateNonUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)
32+
metadata(quantData)$readClassDt <- readClassDt
33+
metadata(quantData)$countMatrix <- metadata(readClassList)$countMatrix
34+
metadata(quantData)$incompatibleCountMatrix <- metadata(readClassList)$incompatibleCountMatrix
35+
metadata(quantData)$sampleNames <- metadata(readClassList)$sampleNames
36+
if(returnDistTable)
37+
metadata(quantData)$distTable <- metadata(metadata(readClassList)$readClassDist)$distTableOld
38+
39+
if(trackReads)
40+
metadata(quantData)$readToTranscriptMap <-
41+
generateReadToTranscriptMap(readClassList,
42+
metadata(readClassList)$readClassDist,
43+
annotations)
44+
45+
return(quantData)
46+
47+
}
48+
49+
#' Generate unique counts
50+
#' @noRd
51+
generateUniqueCounts <- function(readClassDt, countMatrix, annotations){
52+
x <- readClassDt %>% filter(!multi_align & !is.na(eqClass.match))
53+
uniqueCounts <- countMatrix[x$eqClass.match,]
54+
uniqueCounts.tx <- sparse.model.matrix(~ factor(x$txid) - 1)
55+
uniqueCounts <- t(uniqueCounts.tx) %*% uniqueCounts
56+
rownames(uniqueCounts) <- names(annotations)[match(as.numeric(levels(factor(x$txid))),mcols(annotations)$txid)]
57+
counts <- sparseMatrix(length(annotations), ncol(uniqueCounts), x = 0)
58+
rownames(counts) <- names(annotations)
59+
counts[rownames(uniqueCounts),] <- uniqueCounts
60+
return(counts)
61+
62+
# these three lines appear after return, so it's not used, is this used for debug only?
63+
# counts.total = colSums(countMatrix) + colSums(incompatibleCountMatrix)
64+
# counts.total[counts.total==0] = 1
65+
# counts.CPM = counts/counts.total * 10^6
66+
67+
}
68+
69+
70+
#' Generate incompatible counts
71+
#' @noRd
72+
generateIncompatibleCounts <- function(incompatibleCountMatrix, annotations){
73+
genes <- levels(factor(unique(mcols(annotations)$GENEID)))
74+
rownames(incompatibleCountMatrix) <- genes[as.numeric(rownames(incompatibleCountMatrix))]
75+
geneMat <- sparseMatrix(length(genes), ncol(incompatibleCountMatrix), x = 0)
76+
rownames(geneMat) <- genes
77+
geneMat[rownames(incompatibleCountMatrix),] <- incompatibleCountMatrix
78+
return(geneMat)
79+
}
80+
81+
82+
#' Generate non-unique counts
83+
#' @noRd
84+
generateNonUniqueCounts <- function(readClassDt, countMatrix, annotations){
85+
#fuse multi align RCs by gene
86+
x <- readClassDt %>% filter(multi_align & !is.na(eqClass.match))
87+
x <- x %>% distinct(eqClassId, .keep_all = TRUE)
88+
nonuniqueCounts <- countMatrix[x$eqClass.match,, drop = FALSE]
89+
if(nrow(x)>1 & length(unique(x$gene_sid))>1){
90+
nonuniqueCounts.gene <- sparse.model.matrix(~ factor(x$gene_sid) - 1)
91+
nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
92+
} else{
93+
warning("The factor variable 'gene_sid' has only one level. Adjusting output.")
94+
nonuniqueCounts.gene <- Matrix(1, nrow = nrow(x), ncol = 1, sparse = TRUE)
95+
nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
96+
}
97+
#covert ids into gene ids
98+
geneids <- as.numeric(levels(factor(x$gene_sid)))
99+
geneids <- x$txid[match(geneids, x$gene_sid)]
100+
geneids <- mcols(annotations)$GENEID[as.numeric(geneids)]
101+
rownames(nonuniqueCounts) <- geneids
102+
#create matrix for all annotated genes
103+
genes <- levels(factor(unique(mcols(annotations)$GENEID)))
104+
geneMat <- sparseMatrix(length(genes), ncol(nonuniqueCounts), x = 0)
105+
rownames(geneMat) <- genes
106+
if(!is.null(rownames(nonuniqueCounts))){
107+
geneMat[rownames(nonuniqueCounts),] <- nonuniqueCounts
108+
}
109+
return(geneMat)
110+
}

R/bambu-extendAnnotations-utilityCombine.R

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ isore.combineTranscriptCandidates <- function(readClassList,
1919
min.readCount, min.readFractionByGene,
2020
min.txScore.multiExon, min.txScore.singleExon, verbose)
2121
combinedSplicedTranscripts[,confidenceType := "highConfidenceJunctionReads"]
22+
# when single exon min score is greater than 1, skip unspliced transcripts combination
23+
# this is a very customized config, useful when data is very big
24+
if (min.txScore.singleExon > 1)
25+
return(combinedSplicedTranscripts)
2226
combinedUnsplicedTranscripts <-
2327
combineUnsplicedTranscriptModels(readClassList, bpParameters,
2428
stranded, min.readCount, min.readFractionByGene,
@@ -35,11 +39,11 @@ isore.combineTranscriptCandidates <- function(readClassList,
3539
combineSplicedTranscriptModels <- function(readClassList, bpParameters,
3640
min.readCount, min.readFractionByGene, min.txScore.multiExon,
3741
min.txScore.singleExon, verbose){
38-
bpParameters$progressbar = FALSE
42+
bpParameters$progressbar <- FALSE
3943
options(scipen = 999) #maintain numeric basepair locations not sci.notfi.
4044
start.ptm <- proc.time()
4145
n_sample <- length(readClassList)
42-
nGroups = max(ceiling(n_sample/10),min(bpworkers(bpParameters),
46+
nGroups <- max(ceiling(n_sample/10),min(bpworkers(bpParameters),
4347
round(n_sample/2)))
4448
indexList <- sample(rep(seq_len(nGroups), length.out=n_sample))
4549
indexList <- splitAsList(seq_len(n_sample), indexList)
@@ -128,7 +132,7 @@ combineFeatureTibble <- function(combinedFeatureTibble,
128132
maxTxScore.noFit, NSampleReadCount, NSampleReadProp,NSampleTxScore,
129133
starts_with('start'), starts_with('end'), starts_with('readCount'))
130134
} else {
131-
combinedTable = full_join(combinedFeatureTibble,
135+
combinedTable <- full_join(combinedFeatureTibble,
132136
featureTibbleSummarised, by = c('intronStarts', 'intronEnds', 'chr',
133137
'strand'), suffix=c('.combined','.new')) %>%
134138
mutate(NSampleReadCount=pmax0NA(NSampleReadCount.combined) +
@@ -208,7 +212,7 @@ combineUnsplicedTranscriptModels <-
208212
min.readFractionByGene, min.txScore.multiExon,
209213
min.txScore.singleExon, verbose){
210214
start.ptm <- proc.time()
211-
bpParameters$progressbar = FALSE
215+
bpParameters$progressbar <- FALSE
212216
newUnsplicedSeList <-
213217
bplapply(seq_along(readClassList), function(sample_id)
214218
extractNewUnsplicedRanges(readClassSe =
@@ -285,7 +289,7 @@ reduceUnsplicedRanges <- function(rangesList, stranded){
285289
makeUnsplicedTibble <- function(combinedNewUnsplicedSe,newUnsplicedSeList,
286290
colDataNames,min.readCount, min.readFractionByGene,
287291
min.txScore.multiExon, min.txScore.singleExon, bpParameters){
288-
bpParameters$progressbar = FALSE
292+
bpParameters$progressbar <- FALSE
289293
newUnsplicedTibble <- as_tibble(combinedNewUnsplicedSe) %>%
290294
rename(chr = seqnames) %>% select(chr, start, end, strand, row_id) %>%
291295
separate_rows(row_id, sep = "\\+")

0 commit comments

Comments
 (0)