@@ -97,101 +97,114 @@ removePrimers <- function(fn, fout,
9797 if (any(fout %in% fn )) stop(" Output files must be distinct from the input files." )
9898 # Check and enforce primers
9999 if (! is.character(primer.fwd )) stop(" Primer sequences must be provided as base R strings." )
100- if (! is.null(primer.rev )) {
100+ if (is.null(primer.rev )) {
101+ has.rev <- FALSE
102+ } else {
101103 has.rev <- TRUE
102104 if (! is.character(primer.rev )) stop(" Primer sequences must be provided as base R strings." )
103105 }
104106 fixed.fwd <- C_isACGT(primer.fwd )
105107 if (has.rev ) fixed.rev <- C_isACGT(primer.rev )
106- # Read in file
107- fq <- readFastq(fn )
108- inseqs <- length(fq )
109- # Match patterns
110- match.fwd <- as(vmatchPattern(primer.fwd , sread(fq ), max.mismatch = max.mismatch , with.indels = allow.indels , fixed = fixed.fwd ), " list" )
111- if (has.rev ) {
112- match.rev <- as(vmatchPattern(primer.rev , sread(fq ), max.mismatch = max.mismatch , with.indels = allow.indels , fixed = fixed.rev ), " list" )
113- }
114- # If orient, match reverse complement as well
115- if (orient ) {
116- fq.rc <- reverseComplement(fq )
117- match.fwd.rc <- as(vmatchPattern(primer.fwd , sread(fq.rc ), max.mismatch = max.mismatch , with.indels = allow.indels , fixed = fixed.fwd ), " list" )
108+ rval <- matrix (0L , nrow = length(fn ), ncol = 2 )
109+ colnames(rval ) <- c(" reads.in" , " reads.out" )
110+ rownames(rval ) <- basename(fn )
111+ for (i in seq_along(fn )) {
112+ # Read in file
113+ fq <- readFastq(fn [[i ]])
114+ inseqs <- length(fq )
115+ # Match patterns
116+ match.fwd <- as(vmatchPattern(primer.fwd , sread(fq ), max.mismatch = max.mismatch , with.indels = allow.indels , fixed = fixed.fwd ), " list" )
118117 if (has.rev ) {
119- match.rev.rc <- as(vmatchPattern(primer.rev , sread(fq.rc ), max.mismatch = max.mismatch , with.indels = allow.indels , fixed = fixed.rev ), " list" )
118+ match.rev <- as(vmatchPattern(primer.rev , sread(fq ), max.mismatch = max.mismatch , with.indels = allow.indels , fixed = fixed.rev ), " list" )
120119 }
121- }
122- # Tally up hits
123- # Check for possible mis-oriented primer sequences?
124- hits.fwd <- sapply(match.fwd , length )
125- if (has.rev ) hits.rev <- sapply(match.rev , length )
126- if (! require.fwd ) stop(" Currently, only require.fwd=TRUE is supported." )
127- if (has.rev && ! require.rev ) stop(" Currently, only require.rev=TRUE is supported when a reverse primer sequence is provided." )
128- if (require.fwd && sum(hits.fwd ) == 0 ) stop(" No sequences matched the provided forward primer sequence." )
129- if (has.rev && require.rev && sum(hits.rev ) == 0 ) stop(" Reverse primer sequences were provided, but no sequences matched the provided reverse primer sequence." )
130- if (any(hits.fwd > 1 ) || (has.rev && any(hits.rev > 1 ))) {
131- if (verbose ) message(" Multiple matches to the primer(s) in some sequences. Using the longest possible match." )
132- match.fwd [hits.fwd > 1 ] <- sapply(match.fwd [hits.fwd > 1 ], `[` , 1 )
133- if (has.rev ) match.rev [hits.rev > 1 ] <- sapply(match.rev [hits.rev > 1 ], function (x ) rev(x )[1 ])
134- }
135- if (orient ) {
136- hits.fwd.rc <- sapply(match.fwd.rc , length )
137- if (has.rev ) hits.rev.rc <- sapply(match.rev.rc , length )
138- if (any(hits.fwd.rc > 1 ) || (has.rev && any(hits.rev.rc > 1 ))) {
139- if (verbose ) message(" Multiple matches to the primer(s) in some reverse-complement sequences. Using the longest possible match." )
140- match.fwd.rc [hits.fwd.rc > 1 ] <- sapply(match.fwd.rc [hits.fwd.rc > 1 ], `[` , 1 )
141- match.rev.rc [hits.rev.rc > 1 ] <- sapply(match.rev.rc [hits.rev.rc > 1 ], function (x ) rev(x )[1 ])
120+ # If orient, match reverse complement as well
121+ if (orient ) {
122+ fq.rc <- reverseComplement(fq )
123+ match.fwd.rc <- as(vmatchPattern(primer.fwd , sread(fq.rc ), max.mismatch = max.mismatch , with.indels = allow.indels , fixed = fixed.fwd ), " list" )
124+ if (has.rev ) {
125+ match.rev.rc <- as(vmatchPattern(primer.rev , sread(fq.rc ), max.mismatch = max.mismatch , with.indels = allow.indels , fixed = fixed.rev ), " list" )
126+ }
142127 }
143- }
144- # If orient, replace non-matches with rc matches where they exist
145- if (orient ) {
146- flip <- ! hits.fwd & hits.fwd.rc
147- if (any(flip ) && verbose ) cat(sum(flip ), " sequences out of" , length(flip ), " are being reverse-complemented.\n " )
148- fq [flip ] <- fq.rc [flip ]
149- match.fwd [flip ] <- match.fwd.rc [flip ]
128+ # Tally up hits
129+ # Check for possible mis-oriented primer sequences?
150130 hits.fwd <- sapply(match.fwd , length )
151- if (has.rev ) {
152- match.rev [flip ] <- match.rev.rc [flip ]
153- hits.rev <- sapply(match.rev , length )
131+ if (has.rev ) hits.rev <- sapply(match.rev , length )
132+ if (! require.fwd ) stop(" Currently, only require.fwd=TRUE is supported." )
133+ if (has.rev && ! require.rev ) stop(" Currently, only require.rev=TRUE is supported when a reverse primer sequence is provided." )
134+ if (require.fwd && sum(hits.fwd ) == 0 ) stop(" No sequences matched the provided forward primer sequence." )
135+ if (has.rev && require.rev && sum(hits.rev ) == 0 ) stop(" Reverse primer sequences were provided, but no sequences matched the provided reverse primer sequence." )
136+ if (any(hits.fwd > 1 ) || (has.rev && any(hits.rev > 1 ))) {
137+ if (verbose ) message(" Multiple matches to the primer(s) in some sequences. Using the longest possible match." )
138+ match.fwd [hits.fwd > 1 ] <- sapply(match.fwd [hits.fwd > 1 ], `[` , 1 )
139+ if (has.rev ) match.rev [hits.rev > 1 ] <- sapply(match.rev [hits.rev > 1 ], function (x ) rev(x )[1 ])
154140 }
155- }
156- # If require, remove sequences w/o forward and reverse hits
157- keep <- rep(TRUE , length(fq ))
158- if (require.fwd ) keep <- keep & (hits.fwd > 0 )
159- if (has.rev && require.rev ) keep <- keep & (hits.rev > 0 )
160- if (! all(keep )) {
161- fq <- fq [keep ]
162- match.fwd <- match.fwd [keep ]
163- if (has.rev ) match.rev <- match.rev [keep ]
164- }
165- # If trim, narrow to the desired subsequence
166- if (trim.fwd ) {
167- first <- sapply(match.fwd , end ) + 1
168- } else {
169- first <- rep(1L , length(fq ))
170- }
171- if (has.rev && trim.rev ) {
172- last <- sapply(match.rev , start ) - 1
173- } else {
174- last <- width(fq )
175- }
176- keep <- last > first
177- if (! all(keep )) first <- first [keep ]; last <- last [keep ]; fq <- fq [keep ]
178- fq <- narrow(fq , first , last ) # Need to handle zero case gracefully, w/ informative error
179- # Delete fout if it already exists (since writeFastq doesn't overwrite)
180- if (file.exists(fout )) {
181- if (file.remove(fout )) {
182- if (verbose ) message(" Overwriting file:" , fout )
141+ if (orient ) {
142+ hits.fwd.rc <- sapply(match.fwd.rc , length )
143+ if (has.rev ) hits.rev.rc <- sapply(match.rev.rc , length )
144+ if (any(hits.fwd.rc > 1 ) || (has.rev && any(hits.rev.rc > 1 ))) {
145+ if (verbose ) message(" Multiple matches to the primer(s) in some reverse-complement sequences. Using the longest possible match." )
146+ match.fwd.rc [hits.fwd.rc > 1 ] <- sapply(match.fwd.rc [hits.fwd.rc > 1 ], `[` , 1 )
147+ if (has.rev ) match.rev.rc [hits.rev.rc > 1 ] <- sapply(match.rev.rc [hits.rev.rc > 1 ], function (x ) rev(x )[1 ])
148+ }
149+ }
150+ # If orient, replace non-matches with rc matches where they exist
151+ if (orient ) {
152+ flip <- ! hits.fwd & hits.fwd.rc
153+ if (any(flip ) && verbose ) cat(sum(flip ), " sequences out of" , length(flip ), " are being reverse-complemented.\n " )
154+ fq [flip ] <- fq.rc [flip ]
155+ match.fwd [flip ] <- match.fwd.rc [flip ]
156+ hits.fwd <- sapply(match.fwd , length )
157+ if (has.rev ) {
158+ match.rev [flip ] <- match.rev.rc [flip ]
159+ hits.rev <- sapply(match.rev , length )
160+ }
161+ }
162+ # If require, remove sequences w/o forward and reverse hits
163+ keep <- rep(TRUE , length(fq ))
164+ if (require.fwd ) keep <- keep & (hits.fwd > 0 )
165+ if (has.rev && require.rev ) keep <- keep & (hits.rev > 0 )
166+ if (! all(keep )) {
167+ fq <- fq [keep ]
168+ match.fwd <- match.fwd [keep ]
169+ if (has.rev ) match.rev <- match.rev [keep ]
170+ }
171+ # If trim, narrow to the desired subsequence
172+ if (trim.fwd ) {
173+ first <- sapply(match.fwd , end ) + 1
183174 } else {
184- stop(" Failed to overwrite file:" , fout )
175+ first <- rep(1L , length(fq ))
176+ }
177+ if (has.rev && trim.rev ) {
178+ last <- sapply(match.rev , start ) - 1
179+ } else {
180+ last <- width(fq )
181+ }
182+ keep <- last > first
183+ if (! all(keep )) first <- first [keep ]; last <- last [keep ]; fq <- fq [keep ]
184+ fq <- narrow(fq , first , last ) # Need to handle zero case gracefully, w/ informative error
185+ # Delete fout if it already exists (since writeFastq doesn't overwrite)
186+ if (file.exists(fout [[i ]])) {
187+ if (file.remove(fout [[i ]])) {
188+ if (verbose ) message(" Overwriting file:" , fout [[i ]])
189+ } else {
190+ stop(" Failed to overwrite file:" , fout [[i ]])
191+ }
192+ }
193+ writeFastq(fq , fout [[i ]], " w" , compress = compress )
194+ outseqs <- length(fq )
195+ rval [i ,c(" reads.in" , " reads.out" )] <- c(inseqs , outseqs )
196+ if (verbose ) {
197+ outperc <- round(outseqs * 100 / inseqs , 1 )
198+ outperc <- paste(" (" , outperc , " %)" , sep = " " )
199+ message(" Read in " , inseqs , " , output " , outseqs , outperc , " filtered sequences." , sep = " " )
185200 }
186201 }
187- writeFastq(fq , fout , " w" , compress = compress )
188- outseqs <- length(fq )
189- if (verbose ) {
190- outperc <- round(outseqs * 100 / inseqs , 1 )
191- outperc <- paste(" (" , outperc , " %)" , sep = " " )
192- message(" Read in " , inseqs , " , output " , outseqs , outperc , " filtered sequences." , sep = " " )
202+ if (all(rval [," reads.out" ]== 0 )) {
203+ warning(" No reads passed the primer detection." )
204+ } else if (any(rval [," reads.out" ]== 0 )) {
205+ message(" Some input samples had no reads pass the primer detection." )
193206 }
194- return (invisible (c( reads.in = inseqs , reads.out = outseqs ) ))
207+ return (invisible (rval ))
195208}
196209
197210# ' Filter and trim fastq file(s).
0 commit comments