Skip to content

Commit 5e94585

Browse files
authored
Refactor data (#86)
* Update minMapQuality doc * Remove ref genome * remove need for chr5_distance object * Remove need for dummy_distance object * Remove need for dummy_distance object * Refactor chr5 MD objects to data folder * Add data files * Rename read_bam to distance * Try dynamic examples on mmappr * Document data * Try to fix tests * Update MmapprParam show method * BAM files in extdata * Refactor to use extdata BAMs * Add MMAPPR2data to mmappr and calculateDistance docs * Use intermediate MDs in docs * Install MMAPPR2data in Travis script * Use only dummy BAM in tests * Fix file validity * Update tests for new data * Index every file in BamFileList * Remove skipDebug * Redo MD objects * Update tests for new data * Remove old test BAM files, update README * Update docs to match MMAPPR2data change * Remove .addBamFileIndex from validity check * Delete extdata * Bump version * Update docs * Fix doc error * Change system() to system2() * Change back to system; system2 was failing * Remove index warning when building param * Try lazyload=yes * Travis: only devel * Try installing ExperimentHub from GitHub * Remove debug sys.which * Try using bioc-devel * Try bioc-devel again * Try bioc-devel again * Try another travis build, without sudo * Remove username from sample objects * Try possibleDates() again * Try GitHub BiocManager and sudo=required * Try ask=false * Add fake GmapGenomeDirectory to get examples to run * Update docs with examples * Add fake VEP for param example * Create directories for examples * See if outputMmapprData caused weird cleanEx dev.off problem * Reinstate outputMmapprData example, comment out prePeak example, which was causing weird failure * Update docs * Try comment out prePeak example again (fixed) * Uncomment prePeak, didn't make a difference * Don't run both outputMmapprData examples in both places * Update docs
1 parent 1a96756 commit 5e94585

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+390
-172
lines changed

.travis.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,17 @@ sudo: required
1111

1212
bioc_packages:
1313
- BiocCheck
14+
r_github_packages:
15+
- kjohnsen/MMAPPR2data
16+
- Bioconductor/BiocManager
1417

1518
before_script:
1619
# so NAMESPACE is up-to-date
1720
- R -e 'devtools::document(roclets=c("rd", "collate", "namespace"))'
21+
# ensure bioc-devel is being used
22+
- R -e 'BiocManager::install(version = "devel", ask=FALSE)'
23+
- R -e 'BiocManager::valid()'
24+
# run tests here (can't see all output from check)
1825
- R -e 'devtools::test()'
1926

2027
script:

DESCRIPTION

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: MMAPPR2
22
Title: Mutation Mapping Analysis Pipeline for Pooled RNA-Seq
3-
Version: 0.98.9
3+
Version: 0.98.10
44
Authors@R: c(
55
person("Kyle", "Johnsen", email="kjohnsen@byu.edu", role=c("aut")),
66
person('Nathaniel', 'Jenkins', role=c('aut')),
@@ -15,10 +15,12 @@ Depends: R (>= 3.5.0)
1515
License: GPL-3
1616
Encoding: UTF-8
1717
LazyData: true
18-
RoxygenNote: 6.0.1
18+
LazyLoad: yes
19+
RoxygenNote: 6.1.0
1920
Suggests: testthat,
2021
mockery,
21-
roxygen2
22+
roxygen2,
23+
MMAPPR2data
2224
Imports: ensemblVEP (>= 1.20.0),
2325
gmapR,
2426
Rsamtools,

R/aicc_loess.R

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99
#' of the \code{distance} slot list filled.
1010
#'
1111
#' @examples
12-
#' \dontrun{
13-
#' md <- loessFit(md)
14-
#' }
12+
#' postLoessMD <- loessFit(postCalcDistMD)
1513
#' @export
1614
loessFit <- function(mmapprData) {
1715
loessOptResolution <- mmapprData@param@loessOptResolution

R/candidates.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#'
1515
#' @examples
1616
#' \dontrun{
17-
#' md <- generateCandidates(md)
17+
#' postCandidatesMD <- generateCandidates(postPeakRefMD)
1818
#' }
1919
generateCandidates <- function(mmapprData) {
2020

R/data.R

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#' MmapprData object after \code{\link{calculateDistance}} step.
2+
#'
3+
#' Produced originally from BAM files of the zy13 mutant
4+
#' and wild-type pools, aligned to the GRCz11 reference genome
5+
#' using HISAT2, as available in the \link[MMAPPR2data]{MMAPPR2data}
6+
#' package.
7+
#'
8+
#' @format A \code{\linkS4class{MmapprData}} object.
9+
#'
10+
#' @source \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3613585/}
11+
"postCalcDistMD"
12+
13+
14+
#' MmapprData object after \code{\link{loessFit}} step.
15+
#'
16+
#' Produced originally from BAM files of the zy13 mutant
17+
#' and wild-type pools, aligned to the GRCz11 reference genome
18+
#' using HISAT2, as available in the \link[MMAPPR2data]{MMAPPR2data}
19+
#' package.
20+
#'
21+
#' @format A \code{\linkS4class{MmapprData}} object.
22+
#'
23+
#' @source \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3613585/}
24+
"postLoessMD"
25+
26+
27+
#' MmapprData object after \code{\link{prePeak}} step.
28+
#'
29+
#' Produced originally from BAM files of the zy13 mutant
30+
#' and wild-type pools, aligned to the GRCz11 reference genome
31+
#' using HISAT2, as available in the \link[MMAPPR2data]{MMAPPR2data}
32+
#' package.
33+
#'
34+
#' @format A \code{\linkS4class{MmapprData}} object.
35+
#'
36+
#' @source \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3613585/}
37+
"postPrePeakMD"
38+
39+
40+
#' MmapprData object after \code{\link{peakRefinement}} step.
41+
#'
42+
#' Produced originally from BAM files of the zy13 mutant
43+
#' and wild-type pools, aligned to the GRCz11 reference genome
44+
#' using HISAT2, as available in the \link[MMAPPR2data]{MMAPPR2data}
45+
#' package.
46+
#'
47+
#' @format A \code{\linkS4class{MmapprData}} object.
48+
#'
49+
#' @source \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3613585/}
50+
"postPeakRefMD"
51+
52+
53+
#' MmapprData object after \code{\link{generateCandidates}} step.
54+
#'
55+
#' Produced originally from BAM files of the zy13 mutant
56+
#' and wild-type pools, aligned to the GRCz11 reference genome
57+
#' using HISAT2, as available in the \link[MMAPPR2data]{MMAPPR2data}
58+
#' package.
59+
#'
60+
#' @format A \code{\linkS4class{MmapprData}} object.
61+
#'
62+
#' @source \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3613585/}
63+
"postCandidatesMD"
64+
65+

R/read_bam.R renamed to R/distance.R

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#' Read BAM files and generate Euclidean distance data
22
#'
3-
#' Initalizes the MMAPPR2 pipeline and precedes the \code{\link{loessFit}}
3+
#' First step in the MMAPPR2 pipeline. Precedes the \code{\link{loessFit}}
44
#' step.
55
#'
66
#' @param mmapprData The \code{\linkS4class{MmapprData}} object to be analyzed.
@@ -10,14 +10,22 @@
1010
#' @export
1111
#'
1212
#' @examples
13-
#' \dontrun{
14-
#' md <- calculateDistance(md)
13+
#' if (requireNamespace('MMAPPR2data', quietly = TRUE)) {
14+
#' ## Ignore these lines:
15+
#' MMAPPR2:::.insertFakeVEPintoPath()
16+
#' genDir <- gmapR::GmapGenomeDirectory('example', create=TRUE)
17+
#'
18+
#' mmapprParam <- MmapprParam(refGenome = gmapR::GmapGenome("GRCz11", genDir),
19+
#' wtFiles = MMAPPR2data::zy13wtBam(),
20+
#' mutFiles = MMAPPR2data::zy13mutBam(),
21+
#' species = "danio_rerio")
22+
#'
23+
#' md <- new('MmapprData', param = mmapprParam)
24+
#' postCalcDistMD <- calculateDistance(md)
1525
#' }
1626
calculateDistance <- function(mmapprData) {
17-
if (is.na(Rsamtools::index(wtFiles(param(mmapprData)))))
18-
Rsamtools::indexBam(wtFiles(param(mmapprData)))
19-
if (is.na(Rsamtools::index(mutFiles(param(mmapprData)))))
20-
Rsamtools::indexBam(mutFiles(param(mmapprData)))
27+
.indexBamFileList(wtFiles(param(mmapprData)))
28+
.indexBamFileList(mutFiles(param(mmapprData)))
2129

2230
chrList <- suppressWarnings(.getFileReadChrList(mmapprData))
2331

@@ -28,6 +36,15 @@ calculateDistance <- function(mmapprData) {
2836
}
2937

3038

39+
.indexBamFileList <- function(bfl) {
40+
for (i in seq_along(bfl)) {
41+
bamFile <- bfl[[i]]
42+
if (is.na(Rsamtools::index(bamFile)))
43+
Rsamtools::indexBam(bamFile)
44+
}
45+
}
46+
47+
3148
.calcDistForChr <- function(chrRange, param){
3249
startTime <- proc.time()
3350
tryCatch({
@@ -133,10 +150,12 @@ calculateDistance <- function(mmapprData) {
133150

134151

135152
.getFileReadChrList <- function(mmapprData) {suppressWarnings({
136-
wtFiles <- mmapprData@param@wtFiles
137-
mutFiles <- mmapprData@param@mutFiles
153+
bams <- Rsamtools::BamFileList(c(mmapprData@param@wtFiles,
154+
mmapprData@param@mutFiles))
138155

139-
chrRanges <- as(GenomeInfoDb::seqinfo(Rsamtools::BamFileList(c(wtFiles, mutFiles))), "GRanges")
156+
bamInfo <- Rsamtools::seqinfo(bams)
157+
Sys.which('hello-there')
158+
chrRanges <- as(bamInfo, "GRanges")
140159
#cut to standard chromosomes
141160
chrRanges <- GenomeInfoDb::keepStandardChromosomes(chrRanges, pruning.mode='coarse')
142161
chrRanges <- GenomeInfoDb::dropSeqlevels(chrRanges, 'chrM', pruning.mode='coarse')

R/main.R

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,34 @@
2020
#' desired parameters.
2121
#'
2222
#' @return A \code{\linkS4class{MmapprData}} object containing results
23-
#' as well as intermediate data.
23+
#' and/or intermediate data.
2424
#' @export
2525
#'
2626
#' @examples
27-
#' \dontrun{
28-
#' mmapprParam <- MmapprParam(refGenome = GmapGenome("GRCz11"),
29-
#' wtFiles = "wild_type.sorted.bam",
30-
#' mutFiles = "mutant.sorted.bam",
31-
#' species = "danio_rerio")
32-
#' mmapprData <- mmappr(mmapprParam)
27+
#' if (requireNamespace('MMAPPR2data', quietly = TRUE)) {
28+
#' ## Ignore these lines:
29+
#' MMAPPR2:::.insertFakeVEPintoPath()
30+
#' genDir <- gmapR::GmapGenomeDirectory('DEFAULT', create=TRUE)
3331
#'
34-
#' ### Alternately, you can navigate the pipeline step by step.
35-
#' ### This may be helpful for debugging.
36-
#' md <- new('MmapprData', param = mmapprParam)
37-
#' md <- calculateDistance(md)
38-
#' md <- loessFit(md)
39-
#' md <- prePeak(md)
40-
#' md <- peakRefinement(md)
41-
#' md <- generateCandidates(md)
42-
#' md <- outputMmapprData(md)
32+
#' # Specify parameters:
33+
#' mmapprParam <- MmapprParam(refGenome = gmapR::GmapGenome("GRCz11", genDir),
34+
#' wtFiles = MMAPPR2data::zy13wtBam(),
35+
#' mutFiles = MMAPPR2data::zy13mutBam(),
36+
#' species = "danio_rerio")
37+
#'
38+
#' # Run pipeline:
39+
#' mmapprData <- mmappr(mmapprParam)
40+
#'
41+
#' ### Alternately, you can navigate the pipeline step by step.
42+
#' ### This may be helpful for debugging.
43+
#' md <- new('MmapprData', param = mmapprParam)
44+
#' postCalcDistMD <- calculateDistance(md)
45+
#' postLoessMD <- loessFit(postCalcDistMD)
46+
#' postPrePeakMD <- prePeak(postLoessMD)
47+
#' postPeakRefMD <- peakRefinement(postPrePeakMD)
48+
#' \dontrun{postCandidatesMD <- generateCandidates(postPeakRefMD)
49+
#' outputMmapprData(postCandidatesMD)
50+
#' }
4351
#' }
4452
#'
4553
#' @seealso \code{\link{calculateDistance}}, \code{\link{loessFit}},
@@ -131,12 +139,10 @@ mmappr <- function(mmapprParam) {
131139
}
132140

133141

134-
.addBamFileIndex <- function(bf) {
135-
path <- bf$path
136-
index <- paste0(path, ".bai")
137-
if (!file.exists(index)){
138-
Rsamtools::indexBam(bf)
139-
}
140-
return(Rsamtools::BamFile(path, index = index))
141-
}
142-
142+
.insertFakeVEPintoPath <- function() {
143+
unlink('/tmp/bin', recursive=TRUE)
144+
dir.create('/tmp/bin')
145+
file.create('/tmp/bin/vep')
146+
system('chmod 777 /tmp/bin/vep')
147+
Sys.setenv('PATH'=paste(Sys.getenv('PATH'), '/tmp/bin', sep=':'))
148+
}

R/output.R

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88
#' @export
99
#'
1010
#' @examples
11-
#' \dontrun{
12-
#' outputMmapprData(md)
13-
#' }
11+
#' dir.create(outputFolder(param(postCandidatesMD))) ## Ignore this line
12+
#' \dontrun{outputMmapprData(postCandidatesMD)}
1413
outputMmapprData <- function(mmapprData) {
1514
stopifnot(class(mmapprData) == "MmapprData")
1615
oF <- outputFolder(param(mmapprData))

R/peaks.R

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@
1010
#' @export
1111
#'
1212
#' @examples
13-
#' \dontrun{
14-
#' md <- peakRefinement(md)
15-
#' }
13+
#' postPeakRefMD <- peakRefinement(postPrePeakMD)
1614
peakRefinement <- function(mmapprData){
1715
mmapprData@peaks <-
1816
.runFunctionInParallel(mmapprData@peaks,
@@ -102,9 +100,7 @@ peakRefinement <- function(mmapprData){
102100
#' @export
103101
#'
104102
#' @examples
105-
#' \dontrun{
106-
#' md <- prePeak(md)
107-
#' }
103+
#' postPrePeakMD <- prePeak(postLoessMD)
108104
prePeak <- function(mmapprData) {
109105
mmapprData@peaks <- list()
110106

R/s4_methods.R

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,15 @@
7373
#' @export
7474
#'
7575
#' @examples
76-
#' \dontrun{
77-
#' mp <- MmapprParam(gmapR::GmapGenome('GRCz11'), 'wt.bam', 'mut.bam', 'danio_rerio')
76+
#' if (requireNamespace('MMAPPR2data', quietly=TRUE)) {
77+
#' ## Ignore these lines:
78+
#' MMAPPR2:::.insertFakeVEPintoPath()
79+
#' genDir <- gmapR::GmapGenomeDirectory('example', create=TRUE)
80+
#'
81+
#' mmapprParam <- MmapprParam(refGenome = gmapR::GmapGenome("GRCz11", genDir),
82+
#' wtFiles = MMAPPR2data::zy13wtBam(),
83+
#' mutFiles = MMAPPR2data::zy13mutBam(),
84+
#' species = "danio_rerio")
7885
#' }
7986
MmapprParam <- function(refGenome, wtFiles, mutFiles, species, vepFlags=NULL,
8087
outputFolder=NULL, distancePower=4,
@@ -145,13 +152,7 @@ MmapprParam <- function(refGenome, wtFiles, mutFiles, species, vepFlags=NULL,
145152
errors <- c(errors, paste0(files, " is not a BamFileList object"))
146153
for (i in seq_along(files)) {
147154
file <- files[[i]]
148-
if (file.exists(file$path)) {
149-
if (length(Rsamtools::index(file)) == 0) {
150-
file <- .addBamFileIndex(file)
151-
if (length(Rsamtools::index(file)) == 0)
152-
warning(paste0(file$path), " in wtFiles has no index file")
153-
}
154-
} else {
155+
if (!file.exists(file$path)) {
155156
errors <- c(errors, paste0(file$path, " does not exist"))
156157
}
157158
}
@@ -182,7 +183,8 @@ setMethod("show", "MmapprParam", function(object) {
182183
.customPrint(object@vepFlags, margin)
183184

184185
cat("Other parameters:\n")
185-
slotNames <- slotNames("MmapprParam")[5:length(slotNames("MmapprParam"))]
186+
slotNames <- slotNames("MmapprParam")[6:length(slotNames("MmapprParam"))]
187+
slotNames <- c('species', slotNames)
186188
slotValues <- sapply(slotNames, function(name) slot(object, name))
187189
names(slotValues) <- slotNames
188190
print(slotValues, quote=FALSE)
@@ -255,6 +257,7 @@ setMethod("show", "MmapprData", function(object) {
255257
#' peakIntervalWidth peakIntervalWidth<-
256258
#' minDepth minDepth<-
257259
#' minBaseQuality minBaseQuality<-
260+
#' minMapQuality minMapQuality<-
258261
#' loessOptResolution loessOptResolution<-
259262
#' loessOptCutFactor loessOptCutFactor<-
260263
#' naCutoff naCutoff<-
@@ -356,15 +359,15 @@ setMethod("wtFiles<-", "MmapprParam",
356359
function(obj, value) {
357360
obj@wtFiles <- Rsamtools::BamFileList(value)
358361
v <- .validFiles(obj@wtFiles)
359-
if (typeof(v) == 'logical') obj else v
362+
if (typeof(v) == 'logical') obj else stop(v)
360363
})
361364
#' @rdname MmapprParam-functions
362365
#' @export
363366
setMethod("mutFiles<-", "MmapprParam",
364367
function(obj, value) {
365368
obj@mutFiles <- Rsamtools::BamFileList(value)
366369
v <- .validFiles(obj@wtFiles)
367-
if (typeof(v) == 'logical') obj else v
370+
if (typeof(v) == 'logical') obj else stop(v)
368371
})
369372
#' @rdname MmapprParam-functions
370373
#' @export

0 commit comments

Comments
 (0)