Skip to content

Commit d4eba34

Browse files
authored
Merge pull request #17 from nimble-dev/Nmix_efficient
Add fix for rounding errors in extreme edge cases w many replicates
2 parents 355f63d + d00abeb commit d4eba34

File tree

3 files changed

+114
-81
lines changed

3 files changed

+114
-81
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
Package: nimbleEcology
22
Type: Package
33
Title: Distributions for Ecological Models in 'nimble'
4-
Version: 0.3.0
4+
Version: 0.3.1
55
Maintainer: Benjamin R. Goldstein <ben.goldstein@berkeley.edu>
66
Authors@R: c(person("Benjamin R.", "Goldstein", role = c("aut", "cre"),
77
email = "ben.goldstein@berkeley.edu"),
88
person("Daniel", "Turek", role = "aut"),
99
person("Lauren", "Ponisio", role = "aut"),
1010
person("Perry", "de Valpine", role = "aut"))
11-
Date: 2020-05-27
11+
Date: 2020-09-25
1212
Description: Common ecological distributions for 'nimble' models in the form of nimbleFunction objects.
1313
Includes Cormack-Jolly-Seber, occupancy, dynamic occupancy, hidden Markov, dynamic hidden Markov, and N-mixture models.
1414
(Jolly (1965) <DOI: 10.2307/2333826>, Seber (1965) <DOI: 10.2307/2333827>, Turek et al. (2016) <doi:10.1007/s10651-016-0353-z>).

R/dNmixture.R

Lines changed: 107 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -159,56 +159,70 @@ dNmixture_v <- nimbleFunction(
159159
Nmax = double(0, default = -1),
160160
len = double(),
161161
log = integer(0, default = 0)) {
162-
if (length(x) != len) stop("in dNmixture_v, len must equal length(x).")
163-
if (length(prob) != len) stop("in dNmixture_v, len must equal length(prob).")
162+
if (length(x) != len) stop("in dNmixture_v, len must equal length(x).")
163+
if (len != length(prob)) stop("in dNmixture_v, len must equal length(prob).")
164164

165-
# Lambda cannot be negative
166-
if (lambda < 0) {
167-
if (log) return(-Inf)
168-
else return(0)
169-
}
165+
# Lambda cannot be negative
166+
if (lambda < 0) {
167+
if (log) return(-Inf)
168+
else return(0)
169+
}
170170

171-
## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p))
172-
## We determine the lowest N and highest N at extreme quantiles and sum over those.
173-
if (Nmin == -1) {
174-
Nmin <- min(x + qpois(0.00001, lambda * (1 - prob)))
175-
}
176-
if (Nmax == -1) {
177-
Nmax <- max(x + qpois(0.99999, lambda * (1 - prob)))
178-
}
179-
Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x
171+
## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p))
172+
## We determine the lowest N and highest N at extreme quantiles and sum over those.
173+
if (Nmin == -1) {
174+
Nmin <- min(x + qpois(0.00001, lambda * (1 - prob)))
175+
}
176+
if (Nmax == -1) {
177+
Nmax <- max(x + qpois(0.99999, lambda * (1 - prob)))
178+
}
179+
Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x
180180

181-
logProb <- -Inf
181+
logProb <- -Inf
182182

183-
if (Nmax > Nmin) {
184-
numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1
185-
prods <- rep(0, numN)
186-
for (i in (Nmin + 1):Nmax) {
187-
prods[i - Nmin] <- prod(i/(i - x)) / i
188-
}
183+
if (Nmax > Nmin) {
184+
numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1
185+
prods <- rep(0, numN)
186+
for (i in (Nmin + 1):Nmax) {
187+
prods[i - Nmin] <- prod(i/(i - x)) / i
188+
}
189189

190-
ff <- log(lambda) + log(prod(1-prob)) + log(prods)
191-
ff_g1 <- ff[ff > 0] # largest element is the length(ff_g1)th product
192-
max_index <- length(ff_g1)
190+
ff <- log(lambda) + sum(log(1-prob)) + log(prods)
191+
i <- 1
192+
sum_ff_g1 <- 0
193+
while(i < numN & ff[i] > 0) {
194+
sum_ff_g1 <- sum_ff_g1 + ff[i]
195+
i <- i+1
196+
}
197+
max_index <- i-1
198+
if(ff[i] > 0) max_index <- i
199+
if(max_index == 0) max_index <- 1 # not sure this is relevant. it's defensive.
193200

194-
terms <- rep(0, numN + 1)
195-
terms[max_index + 1] <- 1
201+
terms <- numeric(numN + 1)
202+
terms[max_index + 1] <- 1
196203

197-
for (i in 1:max_index) {
198-
terms[i] <- 1 / exp(sum(ff[i:max_index]))
199-
}
200-
for (i in (max_index + 1):numN) {
201-
terms[i + 1] <- exp(sum(ff[(max_index + 1):i]))
202-
}
204+
sumff <- sum_ff_g1 ## should be the same as sum(ff[1:max_index])
203205

204-
log_fac <- sum(ff_g1) + log(sum(terms)) # Final factor is the largest term * (all factors / largest term)
206+
for (i in 1:max_index) {
207+
# terms[i] <- 1 / exp(sum(ff[i:max_index]))
208+
terms[i] <- 1 / exp(sumff)
209+
sumff <- sumff - ff[i]
210+
}
205211

206-
logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log_fac
212+
sumff <- 0
213+
for (i in (max_index + 1):numN) {
214+
# terms[i + 1] <- exp(sum(ff[(max_index + 1):i]))
215+
sumff <- sumff + ff[i]
216+
terms[i + 1] <- exp(sumff)
207217
}
208-
if (log) return(logProb)
209-
else return(exp(logProb))
210-
returnType(double())
211-
})
218+
219+
log_fac <- sum_ff_g1 + log(sum(terms)) # Final factor is the largest term * (all factors / largest term) }
220+
logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log_fac
221+
}
222+
if (log) return(logProb)
223+
else return(exp(logProb))
224+
returnType(double())
225+
})
212226

213227
NULL
214228
#' @rdname dNmixture
@@ -221,55 +235,69 @@ dNmixture_s <- nimbleFunction(
221235
Nmax = double(0, default = -1),
222236
len = double(),
223237
log = integer(0, default = 0)) {
224-
if (length(x) != len) stop("in dNmixture_s, len must equal length(x).")
238+
if (length(x) != len) stop("in dNmixture_s, len must equal length(x).")
225239

226-
# Lambda cannot be negative
227-
if (lambda < 0) {
228-
if (log) return(-Inf)
229-
else return(0)
230-
}
240+
# Lambda cannot be negative
241+
if (lambda < 0) {
242+
if (log) return(-Inf)
243+
else return(0)
244+
}
231245

232-
## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p))
233-
## We determine the lowest N and highest N at extreme quantiles and sum over those.
234-
if (Nmin == -1) {
235-
Nmin <- min(x + qpois(0.00001, lambda * (1 - prob)))
236-
}
237-
if (Nmax == -1) {
238-
Nmax <- max(x + qpois(0.99999, lambda * (1 - prob)))
239-
}
240-
Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x
246+
## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p))
247+
## We determine the lowest N and highest N at extreme quantiles and sum over those.
248+
if (Nmin == -1) {
249+
Nmin <- min(x + qpois(0.00001, lambda * (1 - prob)))
250+
}
251+
if (Nmax == -1) {
252+
Nmax <- max(x + qpois(0.99999, lambda * (1 - prob)))
253+
}
254+
Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x
241255

242-
logProb <- -Inf
256+
logProb <- -Inf
243257

244-
if (Nmax > Nmin) {
245-
numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1
246-
prods <- rep(0, numN)
247-
for (i in (Nmin + 1):Nmax) {
248-
prods[i - Nmin] <- prod(i/(i - x)) / i
249-
}
258+
if (Nmax > Nmin) {
259+
numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1
260+
prods <- rep(0, numN)
261+
for (i in (Nmin + 1):Nmax) {
262+
prods[i - Nmin] <- prod(i/(i - x)) / i
263+
}
250264

251-
ff <- log(lambda) + log(1-prob)*len + log(prods)
252-
ff_g1 <- ff[ff > 0] # largest element is the length(ff_g1)th product
253-
max_index <- length(ff_g1)
265+
ff <- log(lambda) + log(1-prob)*len + log(prods)
266+
i <- 1
267+
sum_ff_g1 <- 0
268+
while(i < numN & ff[i] > 0) {
269+
sum_ff_g1 <- sum_ff_g1 + ff[i]
270+
i <- i+1
271+
}
272+
max_index <- i-1
273+
if(ff[i] > 0) max_index <- i
274+
if(max_index == 0) max_index <- 1 # not sure this is relevant. it's defensive.
254275

255-
terms <- rep(0, numN + 1)
256-
terms[max_index + 1] <- 1
276+
terms <- numeric(numN + 1)
277+
terms[max_index + 1] <- 1
257278

258-
for (i in 1:max_index) {
259-
terms[i] <- 1 / exp(sum(ff[i:max_index]))
260-
}
261-
for (i in (max_index + 1):numN) {
262-
terms[i + 1] <- exp(sum(ff[(max_index + 1):i]))
263-
}
279+
sumff <- sum_ff_g1 ## should be the same as sum(ff[1:max_index])
264280

265-
log_fac <- sum(ff_g1) + log(sum(terms)) # Final factor is the largest term * (all factors / largest term)
281+
for (i in 1:max_index) {
282+
# terms[i] <- 1 / exp(sum(ff[i:max_index]))
283+
terms[i] <- 1 / exp(sumff)
284+
sumff <- sumff - ff[i]
285+
}
266286

267-
logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log_fac
287+
sumff <- 0
288+
for (i in (max_index + 1):numN) {
289+
# terms[i + 1] <- exp(sum(ff[(max_index + 1):i]))
290+
sumff <- sumff + ff[i]
291+
terms[i + 1] <- exp(sumff)
268292
}
269-
if (log) return(logProb)
270-
else return(exp(logProb))
271-
returnType(double())
272-
})
293+
294+
log_fac <- sum_ff_g1 + log(sum(terms)) # Final factor is the largest term * (all factors / largest term) }
295+
logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log_fac
296+
}
297+
if (log) return(logProb)
298+
else return(exp(logProb))
299+
returnType(double())
300+
})
273301

274302
NULL
275303
#' @rdname dNmixture

inst/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
Release notes for each version of nimbleEcologyy
22

3+
0.3.1
4+
-- Fixed rounding errors in edge cases of dNmixture with many observations.
5+
Thanks to NIMBLE developer Chris Paciorek for the algorithm.
6+
7+
38
0.3.0
49

510
-- Fixed an internal problem with dHMM and dDHMM resulting in a mismatch between documentation and internal behavior

0 commit comments

Comments
 (0)