Skip to content

Commit 5fa25de

Browse files
authored
Merge pull request #106 from GoekeLab/bambu-devel
Update documentations in master branch Former-commit-id: 91eae31
2 parents 8dbfafb + 36e1456 commit 5fa25de

File tree

76 files changed

+6543
-329
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+6543
-329
lines changed

.Rbuildignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@
22
^\.Rproj\.user$
33
^packrat/
44
^\.Rprofile$
5+
^doc$
6+
^Meta$

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,5 @@ vignettes/*.pdf
4040
.Renviron
4141
packrat/lib*/
4242
.Rproj.user
43+
doc
44+
Meta

DESCRIPTION

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
Package: bambu
22
Type: Package
33
Title: Reference-guided isoform reconstruction and quantification for long read RNA-Seq data
4-
Version: 0.9.0
5-
Authors@R: c(person("Jonathan Goeke", "Developer", role = "aut",
6-
email = "[email protected]"),
7-
person("Ying Chen", "Developer", role = "cre",email = "chen_ying@gis.a-star.edu.sg"))
4+
Version: 0.1.0
5+
Authors@R: c(person("Ying Chen", "Developer", role = "cre",email = "[email protected]"),
6+
person("Jonathan Goeke", "Developer", role = "aut",
7+
email = "gokej@gis.a-star.edu.sg"))
88
Description: Multi-sample transcript discovery and quantification using long read RNA-Seq data.
99
License: GPL-3
1010
Encoding: UTF-8

R/abundance_quantification.R

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,37 @@
22
#' @title transcript_abundance_quantification
33
#' @param method A string variable indicates the whether a one-step or two-step approach will be used. See \code{Details}
44
#' for details on one-step and two-step approach.
5-
#' @param read_classDT A \code{data.table} with columns
5+
#' @param readClassDt A \code{data.table} with columns
66
#' @importFrom BiocParallel bplapply
77
#' @noRd
8-
abundance_quantification <- function(read_classDT,ncore = 1,
9-
bias_correction = TRUE,
8+
abundance_quantification <- function(readClassDt,ncore = 1,
9+
bias = TRUE,
1010
maxiter = 20000,
11-
conv.control = 10^(-8)){
12-
gene_sidList <- unique(read_classDT$gene_sid)
13-
14-
bpParameters <- BiocParallel::bpparam()
15-
bpParameters$workers <- ncore
16-
11+
conv = 10^(-8)){
12+
gene_sidList <- unique(readClassDt$gene_sid)
13+
14+
if(ncore == 1){
15+
16+
emResultsList <- lapply(as.list(gene_sidList),
17+
run_parallel,
18+
conv = conv,
19+
bias = bias,
20+
maxiter = maxiter,
21+
readClassDt = readClassDt)
22+
}else{
23+
bpParameters <- BiocParallel::bpparam()
24+
bpParameters$workers <- ncore
25+
26+
emResultsList <- BiocParallel::bplapply(as.list(gene_sidList),
27+
run_parallel,
28+
conv = conv,
29+
bias = bias,
30+
maxiter = maxiter,
31+
readClassDt = readClassDt,
32+
BPPARAM=bpParameters)
33+
}
34+
1735

18-
emResultsList <- BiocParallel::bplapply(as.list(gene_sidList),
19-
run_parallel,
20-
conv.control = conv.control,
21-
bias_correction = bias_correction,
22-
maxiter = maxiter,
23-
read_classDT = read_classDT,
24-
BPPARAM=bpParameters)
2536

2637

2738
estimates <- list(do.call('rbind',lapply(1:length(emResultsList), function(x) emResultsList[[x]][[1]])),
@@ -30,8 +41,8 @@ abundance_quantification <- function(read_classDT,ncore = 1,
3041
return(estimates)
3142
}
3243

33-
run_parallel <- function(g,conv.control,bias_correction,maxiter, read_classDT){
34-
tmp <- read_classDT[gene_sid==g]
44+
run_parallel <- function(g,conv,bias,maxiter, readClassDt){
45+
tmp <- readClassDt[gene_sid==g]
3546
if((nrow(tmp)==1)){
3647
out <- list(data.table(tx_sid = tmp$tx_sid,
3748
estimates = tmp$nobs,
@@ -58,12 +69,12 @@ run_parallel <- function(g,conv.control,bias_correction,maxiter, read_classDT){
5869
est_output <- emWithL1(X = as.matrix(a_mat),
5970
Y = n.obs,
6071
lambda = lambda,
61-
d = bias_correction,
72+
d = bias,
6273
maxiter = maxiter,
63-
conv = conv.control)
74+
conv = conv)
6475
t_est <- as.numeric(t(est_output[["theta"]]))
6576

66-
if(bias_correction){
77+
if(bias){
6778
b_est <- as.numeric(t(est_output[["b"]]))
6879
}else{
6980
b_est <- rep(0,ncol(a_mat))

R/annotationFunctions.R

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -34,39 +34,52 @@ prepareAnnotations <- function(txdb) {
3434
}
3535

3636

37-
#' Prepare annotations from gtf
38-
#' @title prepare annotations from gtf file
39-
#' @param gtf.file A string variable indicates the path to a gtf file.
40-
#' @param organism as described in \code{\link{makeTxDbFromGFF}}.
41-
#' @param dataSource as described in \code{\link{makeTxDbFromGFF}}.
42-
#' @param taxonomyId as described in \code{\link{makeTxDbFromGFF}}.
43-
#' @param chrominfo as described in \code{\link{makeTxDbFromGFF}}.
44-
#' @param miRBaseBuild as described in \code{\link{makeTxDbFromGFF}}.
45-
#' @param metadata as described in \code{\link{makeTxDbFromGFF}}.
46-
#' @param dbxrefTag as described in \code{\link{makeTxDbFromGFF}}.
47-
#' @param ... see \code{\link{makeTxDbFromGFF}}.
48-
#' @return A \code{\link{GrangesList}} object
37+
#' Prepare annotation granges object from GTF file
38+
#' @title Prepare annotation granges object from GTF file into a GRangesList object
39+
#' @param file a GTF file
40+
#' @return grlist a \code{\link{GRangesList}} object, unlike \code\link{readFromGTF}},
41+
#' this function finds out the equivalence classes between the transcripts,
42+
#' with \code{\link{mcols}} data having three columns:
43+
#' \itemize{
44+
#' \item TXNAME specifying prefix for new gene Ids (genePrefix.number), defaults to empty
45+
#' \item GENEID indicating whether filter to remove read classes which are a subset of known transcripts(), defaults to TRUE
46+
#' \item eqClass specifying minimun read count to consider a read class valid in a sample, defaults to 2
47+
#' }
48+
#'
4949
#' @export
50-
prepareAnnotationsFromGTF <- function(gtf.file, dataSource=NA,
51-
organism="Homo sapiens",
52-
taxonomyId=NA,
53-
chrominfo=NULL,
54-
miRBaseBuild=NA,
55-
metadata=NULL,
56-
dbxrefTag,...){
57-
return(prepareAnnotations(GenomicFeatures::makeTxDbFromGFF(gtf.file, format = "gtf",
58-
organism = organism,
59-
dataSource = dataSource,
60-
taxonomyId = taxonomyId,
61-
chrominfo = chrominfo,
62-
miRBaseBuild = miRBaseBuild,
63-
metadata = metadata,
64-
dbxrefTag = dbxrefTag
65-
)))
50+
prepareAnnotationsFromGTF <- function(file){
51+
if (missing(file)){
52+
stop('A GTF file is required.')
53+
}else{
54+
data <- read.delim(file,header=FALSE,comment.char='#')
55+
colnames(data) <- c("seqname","source","type","start","end","score","strand","frame","attribute")
56+
data <- data[data$type=='exon',]
57+
data$strand[data$strand=='.'] <- '*'
58+
data$GENEID = gsub('gene_id (.*?);.*','\\1',data$attribute)
59+
data$TXNAME=gsub('.*transcript_id (.*?);.*', '\\1',data$attribute)
60+
data$exon_rank=as.integer(gsub('.*exon_number (.*?);.*', '\\1',data$attribute))
61+
geneData=unique(data[,c('TXNAME', 'GENEID')])
62+
grlist <- makeGRangesListFromDataFrame(
63+
data[,c('seqname', 'start','end','strand','exon_rank','TXNAME')],split.field='TXNAME',keep.extra.columns = TRUE)
64+
65+
unlistedExons <- unlist(grlist, use.names = FALSE)
66+
partitioning <- PartitioningByEnd(cumsum(elementNROWS(grlist)), names=NULL)
67+
txIdForReorder <- togroup(PartitioningByWidth(grlist))
68+
unlistedExons <- unlistedExons[order(txIdForReorder, unlistedExons$exon_rank)] #'exonsByTx' is always sorted by exon rank, not by strand, make sure that this is the case here
69+
unlistedExons$exon_endRank <- unlist(sapply(elementNROWS(grlist),seq,to=1), use.names=FALSE)
70+
unlistedExons <- unlistedExons[order(txIdForReorder, start(unlistedExons))]
71+
# mcols(unlistedExons) <- mcols(unlistedExons)[,c('exon_rank','exon_endRank')]
72+
grlist <- relist(unlistedExons, partitioning)
73+
minEqClasses <- getMinimumEqClassByTx(grlist)
74+
mcols(grlist) <- DataFrame(geneData[(match(names(grlist), geneData$TXNAME)),])
75+
mcols(grlist)$eqClass <- minEqClasses$eqClass[match(names(grlist),minEqClasses$queryTxId)]
76+
}
77+
return (grlist)
6678
}
6779

6880

6981

82+
7083
#' Get minimum equivalent class by Transcript
7184
#' @param exonsByTranscripts exonsByTranscripts
7285
#' @noRd

0 commit comments

Comments
 (0)