Skip to content

Commit c3a8f4d

Browse files
authored
Merge pull request #20 from GoekeLab/proActiv-dev
Updates to trim package size for Bioconductor submission
2 parents c4fee4e + 3f6a24f commit c3a8f4d

File tree

125 files changed

+8433
-23071
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

125 files changed

+8433
-23071
lines changed

.Rbuildignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@
22
^\.Rproj\.user$
33
^LICENSE\.md$
44
^README\.Rmd$
5+
^_pkgdown\.yml$
6+
^docs$
7+
^pkgdown$

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@ vignettes/*.pdf
3838
# R Environment Variables
3939
.Renviron
4040

41-
# pkgdown site
42-
docs/
43-
4441
.Rproj.user
4542

4643
proActiv.Rproj

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ Suggests:
5353
DEXSeq,
5454
Rtsne,
5555
ggplot2,
56-
TxDb.Hsapiens.UCSC.hg38.knownGene,
57-
tidyr
56+
tidyr,
57+
vdiffr
5858
URL: https://github.com/GoekeLab/proActiv
5959
biocViews:
6060
RNASeq,

NAMESPACE

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,13 @@ export(PromoterAnnotation)
55
export(arrange)
66
export(as_tibble)
77
export(assays)
8-
export(calculateJunctionReadCounts)
9-
export(calculatePromoterReadCounts)
108
export(colData)
119
export(filter)
12-
export(getAbsolutePromoterActivity)
13-
export(getGeneExpression)
14-
export(getRelativePromoterActivity)
1510
export(group_by)
1611
export(loadDb)
1712
export(metadata)
1813
export(mutate)
1914
export(n)
20-
export(normalizePromoterReadCounts)
2115
export(plotPromoters)
2216
export(preparePromoterAnnotation)
2317
export(proActiv)

R/PromoterAnnotation-class.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@ setClass(
4949
#'
5050
#' promoterAnnotation <- PromoterAnnotation()
5151
#' intronRanges(promoterAnnotation) <- intronRanges(
52-
#' promoterAnnotation.gencode.v19)
52+
#' promoterAnnotation.gencode.v34.subset)
5353
#' promoterIdMapping(promoterAnnotation) <- promoterIdMapping(
54-
#' promoterAnnotation.gencode.v19)
54+
#' promoterAnnotation.gencode.v34.subset)
5555
#' promoterCoordinates(promoterAnnotation) <- promoterCoordinates(
56-
#' promoterAnnotation.gencode.v19)
56+
#' promoterAnnotation.gencode.v34.subset)
5757
#'
5858

5959
PromoterAnnotation <-

R/annotation-data.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#' Prepare promoter annotation data for the user specified txdb object
1+
#' Prepares promoter annotation from a gtf or txdb
22
#'
33
#' @param txdb A txdb object. The txdb of the annotation version for which
44
#' promoters will be identified. Either `txdb` or `file` argument must be
@@ -18,7 +18,7 @@
1818
#' @examples
1919
#'
2020
#' txdbPath <- system.file('extdata/vignette/annotations/',
21-
#' 'gencode.v34.annotation.chr22.sqlite',
21+
#' 'gencode.v34.annotation.subset.sqlite',
2222
#' package = 'proActiv')
2323
#' txdb <- AnnotationDbi::loadDb(txdbPath)
2424
#' promoterAnnotation <- preparePromoterAnnotation(txdb = txdb,

R/data.R

Lines changed: 12 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,16 @@
1-
#' Promoter annotation data for Gencode.v19 including all the annotation objects
2-
#' required for promoter activity estimation
3-
#'
4-
#' A GRanges object containing the tss coordinate for each promoter for Gencode
5-
#' v19
6-
#'
7-
#' @format A PromoterAnnotation (S4 Class) object containing all the promoter
8-
#' annotation objects for Gencode.v19. The object has 3 slots: \describe{
9-
#' \item{intronRanges}{A GRanges object of 344,651 ranges corresponding
10-
#' to introns, annotated with the associated transcript.}
11-
#' \item{promoterIdMapping}{The id mapping between transcript names,
12-
#' promoter ids and gene ids for Gencode v19.}
13-
#' \item{promoterCoordinates}{A GRanges object of 113,076 ranges
14-
#' showing the tss coordinate for each promoter of Gencode v19,
15-
#' annotated with the associated gene id, coordinate of the 3' end of the first
16-
#' reduced exon, and intron id.} }
17-
#'
18-
"promoterAnnotation.gencode.v19"
19-
20-
#' Promoter annotation data for Gencode.v34 including all the annotation objects
21-
#' required for promoter activity estimation
22-
#'
23-
#' A GRanges object containing the tss coordinate for each promoter for Gencode
24-
#' v34
25-
#'
26-
#' @format A PromoterAnnotation (S4 Class) object containing all the promoter
27-
#' annotation objects for Gencode.v34. The object has 3 slots: \describe{
28-
#' \item{intronRanges}{A GRanges object of 383,654 ranges corresponding
1+
#' @title Promoter annotation for Gencode.v34 (subset)
2+
#' @description Promoter annotation for Gencode.v34
3+
#' (chr1:10,000,000 - 30,000,000)
4+
#' @format A PromoterAnnotation (S4 Class) object containing all promoter
5+
#' annotation objects for Gencode.v34 chr1:10,000,000-30,000,000.
6+
#' The object has 3 slots: \describe{
7+
#' \item{intronRanges}{A GRanges object of 4,523 ranges corresponding
298
#' to introns, annotated with the associated transcript.}
309
#' \item{promoterIdMapping}{The id mapping between transcript names,
3110
#' promoter ids and gene ids for Gencode v34.}
32-
#' \item{promoterCoordinates}{A GRanges object of 122,635 ranges
33-
#' showing the tss coordinate for each promoter of Gencode v34,
34-
#' annotated with the associated gene id, coordinate of the 3' end of the first
35-
#' reduced exon, and intron id.} }
36-
#'
37-
"promoterAnnotation.gencode.v34"
38-
39-
#' Promoter annotation data for Gencode.vM1 including all the annotation objects
40-
#' required for promoter activity estimation
41-
#'
42-
#' A GRanges object containing the tss coordinate for each promoter for Gencode
43-
#' vM1
44-
#'
45-
#' @format A PromoterAnnotation (S4 Class) object containing all the promoter
46-
#' annotation objects for Gencode.vM1. The object has 3 slots: \describe{
47-
#' \item{intronRanges}{A GRanges object of 243,332 ranges corresponding
48-
#' to introns, annotated with the associated transcript.}
49-
#' \item{promoterIdMapping}{The id mapping between transcript names,
50-
#' promoter ids and gene ids for Gencode vM1.}
51-
#' \item{promoterCoordinates}{A GRanges object of 60,768 ranges
52-
#' showing the tss coordinate for each promoter of Gencode vM1,
53-
#' annotated with the associated gene id, coordinate of the 3' end of the first
54-
#' reduced exon, and intron id.} }
55-
#'
56-
"promoterAnnotation.gencode.vM1"
57-
58-
#' Promoter annotation data for Gencode.vM25 including all the annotation objects
59-
#' required for promoter activity estimation
60-
#'
61-
#' A GRanges object containing the tss coordinate for each promoter for Gencode
62-
#' vM25
63-
#'
64-
#' @format A PromoterAnnotation (S4 Class) object containing all the promoter
65-
#' annotation objects for Gencode.vM25. The object has 3 slots: \describe{
66-
#' \item{intronRanges}{A GRanges object of 285,067 ranges corresponding
67-
#' to introns, annotated with the associated transcript.}
68-
#' \item{promoterIdMapping}{The id mapping between transcript names,
69-
#' promoter ids and gene ids for Gencode vM25.}
70-
#' \item{promoterCoordinates}{A GRanges object of 91,902 ranges
71-
#' showing the tss coordinate for each promoter of Gencode vM25,
72-
#' annotated with the associated gene id, coordinate of the 3' end of the first
73-
#' reduced exon, and intron id.} }
11+
#' \item{promoterCoordinates}{A GRanges object of 1,380 ranges
12+
#' showing the tss coordinate for each promoter of Gencode v34
13+
#' chr1:10,000,000-30,000,000, annotated with the associated gene id,
14+
#' coordinate of the 3' end of the first reduced exon, and intron id.} }
7415
#'
75-
"promoterAnnotation.gencode.vM25"
16+
"promoterAnnotation.gencode.v34.subset"

R/estimate-promoter-activity.R

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,6 @@
1010
#' @param pseudocount Number to be used for log2 as pseudocount if log2 is TRUE
1111
#'
1212
#' @return data.frame of absolute promoter activity with promoter and gene ids
13-
#' @export
14-
#'
15-
#' @examples
16-
#'
17-
#' ## junctionReadCounts is an object returned from normalizePromoterReadCounts
18-
#' junctionReadCounts <- readRDS(system.file('extdata/testdata/tophat2',
19-
#' 'normalizedPromoterCounts.rds',
20-
#' package = 'proActiv'))
21-
#' absolutePromoterActivity <- getAbsolutePromoterActivity(junctionReadCounts,
22-
#' promoterAnnotation.gencode.v19,
23-
#' log2 = TRUE,
24-
#' pseudocount = 1)
25-
#'
26-
#' @seealso \code{\link{preparePromoterAnnotation}} for preparing the mapping
27-
#' between promoters and genes, \code{\link{calculatePromoterReadCounts}} and
28-
#' \code{\link{normalizePromoterReadCounts}} for obtaining junction read
29-
#' counts
3013
#'
3114
getAbsolutePromoterActivity <- function(junctionReadCounts, promoterAnnotation,
3215
log2 = TRUE, pseudocount = 1) {
@@ -50,18 +33,7 @@ getAbsolutePromoterActivity <- function(junctionReadCounts, promoterAnnotation,
5033
#' @param absolutePromoterActivity data.frame of absolute promoter activity
5134
#' with promoter and gene ids
5235
#'
53-
#' @return data.frame of gene expression with gene ids
54-
#' @export
55-
#'
56-
#' @examples
57-
#'
58-
#' ## absolutePromoterActivity is an object returned
59-
#' ## from getAbsolutePromoterActivity
60-
#' absolutePromoterActivity <- readRDS(system.file('extdata/testdata/tophat2',
61-
#' 'absolutePromoterActivity.rds',
62-
#' package = 'proActiv'))
63-
#' geneExpression <- getGeneExpression(absolutePromoterActivity)
64-
#'
36+
#' @return data.frame of gene expression with gene ids#'
6537
#'
6638
getGeneExpression <- function(absolutePromoterActivity) {
6739
print('Calculating gene expression...')
@@ -88,22 +60,6 @@ getGeneExpression <- function(absolutePromoterActivity) {
8860
#' @param geneExpression data.frame of gene expression with gene ids
8961
#'
9062
#' @return data.frame of relative promoter activity with promoter and gene ids
91-
#' @export
92-
#'
93-
#' @examples
94-
#'
95-
#' ## absolutePromoterActivity is an object returned
96-
#' ## from getAbsolutePromoterActivity
97-
#' ## geneExpression is an object returned from getGeneExpression
98-
#' absolutePromoterActivity <- readRDS(system.file('extdata/testdata/tophat2',
99-
#' 'absolutePromoterActivity.rds',
100-
#' package = 'proActiv'))
101-
#' geneExpression <- readRDS(system.file('extdata/testdata/tophat2',
102-
#' 'geneExpression.rds',
103-
#' package = 'proActiv'))
104-
#' relativePromoterActivity <- getRelativePromoterActivity(
105-
#' absolutePromoterActivity,
106-
#' geneExpression)
10763
#'
10864
getRelativePromoterActivity <- function(absolutePromoterActivity,
10965
geneExpression) {

R/junction-read-count.R

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,8 @@
1010
#' 'star' or 'bam'
1111
#' @param genome character genome version
1212
#'
13-
#' @export
1413
#' @return The total number of junction reads overlapping with each promoter for
1514
#' the input annotated intron ranges
16-
#'
17-
#' @examples
18-
#'
19-
#' file <- list.files(system.file('extdata/testdata/tophat2',
20-
#' package = 'proActiv'),
21-
#' full.names = TRUE, pattern = 'sample1')
22-
#' promoterCoordinates <- promoterCoordinates(promoterAnnotation.gencode.v19)
23-
#' intronRanges <- intronRanges(promoterAnnotation.gencode.v19)
24-
#' junctionCounts <- calculateJunctionReadCounts(promoterCoordinates,
25-
#' intronRanges,
26-
#' file,
27-
#' fileType = 'tophat')
2815
#'
2916
#' @importFrom GenomeInfoDb seqlevelsStyle
3017
#' @importFrom S4Vectors queryHits
@@ -100,22 +87,8 @@ calculateJunctionReadCounts <- function(promoterCoordinates, intronRanges,
10087
#'
10188
#' @return A data.frame object. The number of junction reads per promoter (rows)
10289
#' for each sample (cols)
103-
#' @export
104-
#'
105-
#' @examples
106-
#'
107-
#' files <- list.files(system.file('extdata/testdata/tophat2',
108-
#' package = 'proActiv'),
109-
#' full.names = TRUE, pattern = 'sample')
110-
#' fileLabels <- c('sample1', 'sample2')
111-
#' promoterAnnotation <- promoterAnnotation.gencode.v19
112-
#' promoterReadCounts <- calculatePromoterReadCounts(promoterAnnotation,
113-
#' files,
114-
#' fileLabels,
115-
#' fileType = 'tophat',
116-
#' genome = NULL,
117-
#' numberOfCores = 1)
11890
#' @importFrom BiocParallel bpparam bplapply
91+
#'
11992
calculatePromoterReadCounts <- function(promoterAnnotation, files = NULL,
12093
fileLabels = NULL, fileType = NULL ,
12194
genome = NULL, numberOfCores = 1) {
@@ -160,16 +133,6 @@ calculatePromoterReadCounts <- function(promoterAnnotation, files = NULL,
160133
#' @return A data.frame object. The normalized number of junction reads per
161134
#' promoter (rows) for each sample (cols) using DESeq2 counts function.
162135
#' Requires 'DESeq2' package to be installed
163-
#' @export
164-
#'
165-
#' @examples
166-
#'
167-
#' ## promoterReadCounts is an object returned from calculatePromoterReadCounts
168-
#' promoterReadCounts <- readRDS(system.file('extdata/testdata/tophat2',
169-
#' 'promoterCounts.rds',
170-
#' package = 'proActiv'))
171-
#' normalizedPromoterReadCounts <- normalizePromoterReadCounts(
172-
#' promoterReadCounts)
173136
#'
174137
#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateSizeFactors counts
175138
normalizePromoterReadCounts <- function(promoterReadCounts) {

R/plot-proActiv.R

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
#' Wrapper function returning Summarized Experiment object giving promoter
2-
#' counts and activity
1+
#' Visualizes promoter activity and transcript model for a gene of interest
32
#'
43
#' @param result A SummarizedExperiment object with assays giving promoter
54
#' counts and activity with gene expression stored as column data and
@@ -39,17 +38,23 @@
3938
#'
4039
#' @examples
4140
#'
42-
#' gene <- 'ENSG00000076864.19'
43-
#' ## Genomic Ranges giving exons by transcripts of gene
44-
#' ranges <- readRDS(system.file('extdata/vignette/annotations',
45-
#' 'exonsBy.rap1gap.rds',
46-
#' package = 'proActiv'))
47-
#' ## summarizedExperiment returned by proActiv (subsetted to gene RAP1GAP)
48-
#' result <- readRDS(system.file('extdata/vignette/annotations',
49-
#' 'result.rap1gap.rds',
50-
#' package ='proActiv'))
51-
#' plotPromoters(result = result, gene = gene, ranges = ranges)
52-
#'
41+
#' ## First, run proActiv to generate a summarizedExperiment result
42+
#' files <- list.files(system.file('extdata/vignette/junctions',
43+
#' package = 'proActiv'),
44+
#' full.names = TRUE)
45+
#' promoterAnnotation <- promoterAnnotation.gencode.v34.subset
46+
#' result <- proActiv(files = files,
47+
#' promoterAnnotation = promoterAnnotation,
48+
#' condition = rep(c('A549','HepG2'), each=3),
49+
#' ncores = 1)
50+
#' ## Read in pre-computed ranges
51+
#' txdb <- AnnotationDbi::loadDb(system.file('extdata/vignette/annotations',
52+
#' 'gencode.v34.annotation.rap1gap.sqlite',
53+
#' package = 'proActiv'))
54+
#' ## Declare a gene of interest
55+
#' gene <- 'ENSG00000076864.19'
56+
#' ## Call plot
57+
#' plotPromoters(result = result, gene = gene, txdb = txdb)
5358
#'
5459
#' @importFrom Gviz plotTracks GenomeAxisTrack
5560
#' @importFrom SummarizedExperiment rowData colData
@@ -61,16 +66,16 @@ plotPromoters <- function(result, gene, txdb, ranges,
6166
label.col = 'black', label.size = 0.7,
6267
arrow.width = NULL, arrow.fill = 'transparent',
6368
arrow.border = 'grey') {
64-
print(paste0('Plotting ', gene))
6569
result.gene <- result[rowData(result)$geneId == gene, ]
6670
rdata <- rowData(result.gene)[complete.cases(rowData(result.gene)),]
6771
groups <- unique(colData(result.gene)$condition)
6872

6973
if (nrow(rdata) == 0) {
70-
stop('Gene selected has only one transcript which is a single-exon
71-
transcript. proActiv does not estimate promoter activity in
72-
such cases.')
74+
stop('Gene ID selected is either not present or has only one transcript
75+
which is a single-exon transcript. proActiv does not estimate
76+
promoter activity in such cases.')
7377
}
78+
print(paste0('Plotting ', gene))
7479

7580
grtrack <- getGeneRegionTrack(rdata, gene, txdb, ranges)
7681
dtracklist <- getDataTrack(rdata, groups, blk.width = blk.width,

0 commit comments

Comments
 (0)