Skip to content

Commit f4423b9

Browse files
committed
removePrimers bug fixes and version bump to 1.12.1
1 parent b298181 commit f4423b9

File tree

4 files changed

+103
-86
lines changed

4 files changed

+103
-86
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Description: The dada2 package infers exact amplicon sequence variants (ASVs) fr
88
removing substitution and chimera errors. Taxonomic classification is available
99
via a native implementation of the RDP naive Bayesian classifier, and species-level
1010
assignment to 16S rRNA gene fragments by exact matching.
11-
Version: 1.12.0
11+
Version: 1.12.1
1212
Date: 2019-04-22
1313
Maintainer: Benjamin Callahan <benjamin.j.callahan@gmail.com>
1414
Author: Benjamin Callahan <benjamin.j.callahan@gmail.com>, Paul McMurdie, Susan Holmes

R/dada.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@ assign("PSEUDO_ABUNDANCE", Inf, envir=dada_opts)
3434
#' If dada is run in selfConsist=TRUE mode, the algorithm will infer both the sample composition and
3535
#' the parameters of its error model from the data.
3636
#'
37-
#' @param derep (Required). A \code{\link{derep-class}} object, the output of \code{\link{derepFastq}}.
38-
#' A list of such objects can be provided, in which case each will be denoised with a shared error model.
37+
#' @param derep (Required). The file path(s) to the fastq or fastq.gz file(s), or any file format supported
38+
#' by \code{\link[ShortRead]{FastqStreamer}}, corresponding to the samples to be denoised.
39+
#' A \code{\link{derep-class}} object (or list thereof) returned by \code{link{derepFastq}} can also be provided
40+
#' If multiple samples are provided, each will be denoised with a shared error model.
3941
#'
4042
#' @param err (Required). 16xN numeric matrix, or an object coercible by \code{\link{getErrors}}
4143
#' such as the output of the \code{\link{learnErrors}} function.

R/filter.R

Lines changed: 94 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -97,101 +97,114 @@ removePrimers <- function(fn, fout,
9797
if(any(fout %in% fn)) stop("Output files must be distinct from the input files.")
9898
# Check and enforce primers
9999
if(!is.character(primer.fwd)) stop("Primer sequences must be provided as base R strings.")
100-
if(!is.null(primer.rev)) {
100+
if(is.null(primer.rev)) {
101+
has.rev <- FALSE
102+
} else {
101103
has.rev <- TRUE
102104
if(!is.character(primer.rev)) stop("Primer sequences must be provided as base R strings.")
103105
}
104106
fixed.fwd <- C_isACGT(primer.fwd)
105107
if(has.rev) fixed.rev <- C_isACGT(primer.rev)
106-
# Read in file
107-
fq <- readFastq(fn)
108-
inseqs <- length(fq)
109-
# Match patterns
110-
match.fwd <- as(vmatchPattern(primer.fwd, sread(fq), max.mismatch=max.mismatch, with.indels=allow.indels, fixed=fixed.fwd), "list")
111-
if(has.rev) {
112-
match.rev <- as(vmatchPattern(primer.rev, sread(fq), max.mismatch=max.mismatch, with.indels=allow.indels, fixed=fixed.rev), "list")
113-
}
114-
# If orient, match reverse complement as well
115-
if(orient) {
116-
fq.rc <- reverseComplement(fq)
117-
match.fwd.rc <- as(vmatchPattern(primer.fwd, sread(fq.rc), max.mismatch=max.mismatch, with.indels=allow.indels, fixed=fixed.fwd), "list")
108+
rval <- matrix(0L, nrow=length(fn), ncol=2)
109+
colnames(rval) <- c("reads.in", "reads.out")
110+
rownames(rval) <- basename(fn)
111+
for(i in seq_along(fn)) {
112+
# Read in file
113+
fq <- readFastq(fn[[i]])
114+
inseqs <- length(fq)
115+
# Match patterns
116+
match.fwd <- as(vmatchPattern(primer.fwd, sread(fq), max.mismatch=max.mismatch, with.indels=allow.indels, fixed=fixed.fwd), "list")
118117
if(has.rev) {
119-
match.rev.rc <- as(vmatchPattern(primer.rev, sread(fq.rc), max.mismatch=max.mismatch, with.indels=allow.indels, fixed=fixed.rev), "list")
118+
match.rev <- as(vmatchPattern(primer.rev, sread(fq), max.mismatch=max.mismatch, with.indels=allow.indels, fixed=fixed.rev), "list")
120119
}
121-
}
122-
# Tally up hits
123-
# Check for possible mis-oriented primer sequences?
124-
hits.fwd <- sapply(match.fwd, length)
125-
if(has.rev) hits.rev <- sapply(match.rev, length)
126-
if(!require.fwd) stop("Currently, only require.fwd=TRUE is supported.")
127-
if(has.rev && !require.rev) stop("Currently, only require.rev=TRUE is supported when a reverse primer sequence is provided.")
128-
if(require.fwd && sum(hits.fwd) == 0) stop("No sequences matched the provided forward primer sequence.")
129-
if(has.rev && require.rev && sum(hits.rev) == 0) stop("Reverse primer sequences were provided, but no sequences matched the provided reverse primer sequence.")
130-
if(any(hits.fwd>1) || (has.rev && any(hits.rev>1))) {
131-
if(verbose) message("Multiple matches to the primer(s) in some sequences. Using the longest possible match.")
132-
match.fwd[hits.fwd>1] <- sapply(match.fwd[hits.fwd>1], `[`, 1)
133-
if(has.rev) match.rev[hits.rev>1] <- sapply(match.rev[hits.rev>1], function(x) rev(x)[1])
134-
}
135-
if(orient) {
136-
hits.fwd.rc <- sapply(match.fwd.rc, length)
137-
if(has.rev) hits.rev.rc <- sapply(match.rev.rc, length)
138-
if(any(hits.fwd.rc>1) || (has.rev && any(hits.rev.rc>1))) {
139-
if(verbose) message("Multiple matches to the primer(s) in some reverse-complement sequences. Using the longest possible match.")
140-
match.fwd.rc[hits.fwd.rc>1] <- sapply(match.fwd.rc[hits.fwd.rc>1], `[`, 1)
141-
match.rev.rc[hits.rev.rc>1] <- sapply(match.rev.rc[hits.rev.rc>1], function(x) rev(x)[1])
120+
# If orient, match reverse complement as well
121+
if(orient) {
122+
fq.rc <- reverseComplement(fq)
123+
match.fwd.rc <- as(vmatchPattern(primer.fwd, sread(fq.rc), max.mismatch=max.mismatch, with.indels=allow.indels, fixed=fixed.fwd), "list")
124+
if(has.rev) {
125+
match.rev.rc <- as(vmatchPattern(primer.rev, sread(fq.rc), max.mismatch=max.mismatch, with.indels=allow.indels, fixed=fixed.rev), "list")
126+
}
142127
}
143-
}
144-
# If orient, replace non-matches with rc matches where they exist
145-
if(orient) {
146-
flip <- !hits.fwd & hits.fwd.rc
147-
if(any(flip) && verbose) cat(sum(flip), "sequences out of", length(flip), "are being reverse-complemented.\n")
148-
fq[flip] <- fq.rc[flip]
149-
match.fwd[flip] <- match.fwd.rc[flip]
128+
# Tally up hits
129+
# Check for possible mis-oriented primer sequences?
150130
hits.fwd <- sapply(match.fwd, length)
151-
if(has.rev) {
152-
match.rev[flip] <- match.rev.rc[flip]
153-
hits.rev <- sapply(match.rev, length)
131+
if(has.rev) hits.rev <- sapply(match.rev, length)
132+
if(!require.fwd) stop("Currently, only require.fwd=TRUE is supported.")
133+
if(has.rev && !require.rev) stop("Currently, only require.rev=TRUE is supported when a reverse primer sequence is provided.")
134+
if(require.fwd && sum(hits.fwd) == 0) stop("No sequences matched the provided forward primer sequence.")
135+
if(has.rev && require.rev && sum(hits.rev) == 0) stop("Reverse primer sequences were provided, but no sequences matched the provided reverse primer sequence.")
136+
if(any(hits.fwd>1) || (has.rev && any(hits.rev>1))) {
137+
if(verbose) message("Multiple matches to the primer(s) in some sequences. Using the longest possible match.")
138+
match.fwd[hits.fwd>1] <- sapply(match.fwd[hits.fwd>1], `[`, 1)
139+
if(has.rev) match.rev[hits.rev>1] <- sapply(match.rev[hits.rev>1], function(x) rev(x)[1])
154140
}
155-
}
156-
# If require, remove sequences w/o forward and reverse hits
157-
keep <- rep(TRUE, length(fq))
158-
if(require.fwd) keep <- keep & (hits.fwd > 0)
159-
if(has.rev && require.rev) keep <- keep & (hits.rev > 0)
160-
if(!all(keep)) {
161-
fq <- fq[keep]
162-
match.fwd <- match.fwd[keep]
163-
if(has.rev) match.rev <- match.rev[keep]
164-
}
165-
# If trim, narrow to the desired subsequence
166-
if(trim.fwd) {
167-
first <- sapply(match.fwd, end) + 1
168-
} else {
169-
first <- rep(1L, length(fq))
170-
}
171-
if(has.rev && trim.rev) {
172-
last <- sapply(match.rev, start) - 1
173-
} else {
174-
last <- width(fq)
175-
}
176-
keep <- last > first
177-
if(!all(keep)) first <- first[keep]; last <- last[keep]; fq <- fq[keep]
178-
fq <- narrow(fq, first, last) # Need to handle zero case gracefully, w/ informative error
179-
# Delete fout if it already exists (since writeFastq doesn't overwrite)
180-
if(file.exists(fout)) {
181-
if(file.remove(fout)) {
182-
if(verbose) message("Overwriting file:", fout)
141+
if(orient) {
142+
hits.fwd.rc <- sapply(match.fwd.rc, length)
143+
if(has.rev) hits.rev.rc <- sapply(match.rev.rc, length)
144+
if(any(hits.fwd.rc>1) || (has.rev && any(hits.rev.rc>1))) {
145+
if(verbose) message("Multiple matches to the primer(s) in some reverse-complement sequences. Using the longest possible match.")
146+
match.fwd.rc[hits.fwd.rc>1] <- sapply(match.fwd.rc[hits.fwd.rc>1], `[`, 1)
147+
if(has.rev) match.rev.rc[hits.rev.rc>1] <- sapply(match.rev.rc[hits.rev.rc>1], function(x) rev(x)[1])
148+
}
149+
}
150+
# If orient, replace non-matches with rc matches where they exist
151+
if(orient) {
152+
flip <- !hits.fwd & hits.fwd.rc
153+
if(any(flip) && verbose) cat(sum(flip), "sequences out of", length(flip), "are being reverse-complemented.\n")
154+
fq[flip] <- fq.rc[flip]
155+
match.fwd[flip] <- match.fwd.rc[flip]
156+
hits.fwd <- sapply(match.fwd, length)
157+
if(has.rev) {
158+
match.rev[flip] <- match.rev.rc[flip]
159+
hits.rev <- sapply(match.rev, length)
160+
}
161+
}
162+
# If require, remove sequences w/o forward and reverse hits
163+
keep <- rep(TRUE, length(fq))
164+
if(require.fwd) keep <- keep & (hits.fwd > 0)
165+
if(has.rev && require.rev) keep <- keep & (hits.rev > 0)
166+
if(!all(keep)) {
167+
fq <- fq[keep]
168+
match.fwd <- match.fwd[keep]
169+
if(has.rev) match.rev <- match.rev[keep]
170+
}
171+
# If trim, narrow to the desired subsequence
172+
if(trim.fwd) {
173+
first <- sapply(match.fwd, end) + 1
183174
} else {
184-
stop("Failed to overwrite file:", fout)
175+
first <- rep(1L, length(fq))
176+
}
177+
if(has.rev && trim.rev) {
178+
last <- sapply(match.rev, start) - 1
179+
} else {
180+
last <- width(fq)
181+
}
182+
keep <- last > first
183+
if(!all(keep)) first <- first[keep]; last <- last[keep]; fq <- fq[keep]
184+
fq <- narrow(fq, first, last) # Need to handle zero case gracefully, w/ informative error
185+
# Delete fout if it already exists (since writeFastq doesn't overwrite)
186+
if(file.exists(fout[[i]])) {
187+
if(file.remove(fout[[i]])) {
188+
if(verbose) message("Overwriting file:", fout[[i]])
189+
} else {
190+
stop("Failed to overwrite file:", fout[[i]])
191+
}
192+
}
193+
writeFastq(fq, fout[[i]], "w", compress=compress)
194+
outseqs <- length(fq)
195+
rval[i,c("reads.in", "reads.out")] <- c(inseqs, outseqs)
196+
if(verbose) {
197+
outperc <- round(outseqs * 100 / inseqs, 1)
198+
outperc <- paste(" (", outperc, "%)", sep="")
199+
message("Read in ", inseqs, ", output ", outseqs, outperc, " filtered sequences.", sep="")
185200
}
186201
}
187-
writeFastq(fq, fout, "w", compress=compress)
188-
outseqs <- length(fq)
189-
if(verbose) {
190-
outperc <- round(outseqs * 100 / inseqs, 1)
191-
outperc <- paste(" (", outperc, "%)", sep="")
192-
message("Read in ", inseqs, ", output ", outseqs, outperc, " filtered sequences.", sep="")
202+
if(all(rval[,"reads.out"]==0)) {
203+
warning("No reads passed the primer detection.")
204+
} else if(any(rval[,"reads.out"]==0)) {
205+
message("Some input samples had no reads pass the primer detection.")
193206
}
194-
return(invisible(c(reads.in=inseqs, reads.out=outseqs)))
207+
return(invisible(rval))
195208
}
196209

197210
#' Filter and trim fastq file(s).

man/dada.Rd

Lines changed: 4 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)