Skip to content

Commit bb7795e

Browse files
authored
Updated GoF test
Myllymärki et al., 2015; Mrkvicka et al., 2017.
2 parents 604b511 + ca15dc7 commit bb7795e

File tree

4 files changed

+164
-29
lines changed

4 files changed

+164
-29
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ importFrom("RcppParallel", "RcppParallelLibs")
99
importFrom("rlang", ".data")
1010
importFrom("spatstat.geom", "marks")
1111
importFrom("doFuture", "%dofuture%")
12+
importFrom("stats", "quantile", "sd")
1213

1314
export("as.Dtable")
1415
export("as.wmppp")

R/GoFtest.R

Lines changed: 105 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,36 @@
1-
GoFtest <- function(Envelope) {
1+
GoFtest <- function(
2+
Envelope,
3+
Test = "DCLF",
4+
Scaling = "Asymmetric",
5+
Range = NULL) {
26

37
# Verify Envelope
48
if (!inherits(Envelope, "envelope")) {
59
stop("Envelope is not of class envelope")
610
}
11+
# Verify Scaling
12+
if (!is.character(Scaling) | !is.vector(Scaling) | !length(Scaling) == 1) {
13+
stop("Argument 'Scaling' must be a character vector of length one")
14+
}
15+
if (!(Scaling %in% c("Quantile", "Studentized", "Asymmetric", "None"))) {
16+
stop("Invalid argument: 'Scaling'. Accepted arguments are: Quantile,
17+
Studentized, Asymmetric, None.")
18+
}
19+
# Verify Test
20+
if (!is.character(Test) | !is.vector(Test) | !length(Test) == 1) {
21+
stop("Argument 'Test' must be a character vector of length one")
22+
}
23+
if (!(Test %in% c("DCLF", "Integral", "MAD"))) {
24+
stop("Invalid argument: 'Test'. Accepted arguments are: DCLF, Integral,
25+
MAD.")
26+
}
27+
# Verify Range
28+
if (!is.null(Range) && (!is.vector(Range) |
29+
!is.numeric(Range) |
30+
length(Range) != 2)) {
31+
stop("Invalid argument: 'Range'. Accepted arguments are a numeric vector of length two,
32+
specifying the minimum and maximum distances over which to compute the test.")
33+
}
734
# Verify simulations
835
if (is.null(attr(Envelope, "simfuns"))) {
936
stop("Envelope does not contain simulations in its attribute simfuns")
@@ -13,32 +40,93 @@ GoFtest <- function(Envelope) {
1340
SimulatedValues <- as.data.frame(attr(Envelope, "simfuns"))[, -1]
1441
}
1542

43+
# Transform observed Ls into K for the test (L envelopes are constructed from
44+
# the K function)
45+
if (any(attr(Envelope, "fname") %in% c("L", "Lmm"))) {
46+
ActualValues <- (ActualValues + r)^2 * pi
47+
}
48+
49+
# Restrict analysis to chosen distance range
50+
if (!is.null(Range)) {
51+
if(min(Range) < max(r) && max(Range) > min(r)) {
52+
SelectedR <- (r > min(Range) & r < max(Range))
53+
ActualValues <- ActualValues[SelectedR]
54+
SimulatedValues <- SimulatedValues[SelectedR, ]
55+
r <- r[SelectedR]
56+
} else {
57+
warning("The selected range is outside the simulated distances.
58+
The test was computed using all distances from the envelope.")
59+
}
60+
}
61+
1662
NumberOfSimulations <- dim(SimulatedValues)[2]
1763
AverageSimulatedValues <- apply(SimulatedValues, 1, sum) / (NumberOfSimulations - 1)
18-
rIncrements <- (r - c(0,r)[seq_along(r)])[-1]
64+
rIncrements <- (r - c(0, r)[seq_along(r)])[-1]
65+
66+
# Calculate weights to scale residuals of the statistic
67+
Weights <- switch(Scaling,
68+
"Studentized" = 1 / apply(SimulatedValues, 1, sd, na.rm = T),
69+
"Quantile" = 1 / (apply(SimulatedValues, 1,
70+
quantile, probs = 0.975, na.rm = T) -
71+
apply(SimulatedValues, 1,
72+
quantile, probs = 0.025, na.rm = T)),
73+
"Asymmetric" = {
74+
Upper <- 1 / (apply(SimulatedValues, 1,
75+
quantile, probs = 0.975,na.rm = T) -
76+
AverageSimulatedValues)
77+
Lower <- 1 / (AverageSimulatedValues -
78+
apply(SimulatedValues, 1,
79+
quantile, probs = 0.025, na.rm = T))
80+
list(UprW = Upper, LwrW = Lower)
81+
},
82+
"None" = rep(1, length(r)))
1983

20-
# Ui calculate the statistic for a simulation
21-
Ui <- function(SimulationNumber) {
22-
Departure <- (SimulatedValues[, SimulationNumber] -
23-
AverageSimulatedValues)[seq_along(r) - 1]
24-
WeightedDeparture <- (Departure[!is.nan(Departure)])^2 *
25-
rIncrements[!is.nan(Departure)]
26-
return(sum(WeightedDeparture))
84+
# Ui calculate the statistic for one simulation
85+
Ui <- function(SimulationNumber, ValueToTest) {
86+
Departure <- (ValueToTest[, SimulationNumber] -
87+
AverageSimulatedValues)[seq_along(r) - 1]
88+
if (inherits(Weights, "list")) {
89+
ScaledDeparture <- sapply(seq_along(Departure),
90+
FUN= function(x) ifelse(Departure[x] >= 0,
91+
Departure[x] * Weights$UprW[x],
92+
Departure[x] * Weights$LwrW[x]))
93+
ScaledDeparture <- as.vector(ScaledDeparture)
94+
} else {
95+
ScaledDeparture <- Departure * Weights[seq_along(r) - 1]
96+
}
97+
GofStatistic <- switch(Test,
98+
"DCLF" =
99+
sum((ScaledDeparture[!is.nan(ScaledDeparture)])^2 *
100+
rIncrements[!is.nan(ScaledDeparture)],
101+
na.rm = T),
102+
"Integral" =
103+
sum(abs((ScaledDeparture[!is.nan(ScaledDeparture)])) *
104+
rIncrements[!is.nan(ScaledDeparture)],
105+
na.rm = T),
106+
"MAD" = max(abs(ScaledDeparture), na.rm = T))
107+
return(GofStatistic)
27108
}
28109

29110
# Calculate the Ui statistic for all simulations
30111
SimulatedU <- vapply(
31112
seq_len(NumberOfSimulations),
32113
FUN = Ui,
33-
FUN.VALUE = 0
114+
FUN.VALUE = 0,
115+
ValueToTest = SimulatedValues
34116
)
35117

36-
# Calculate the statistic for the actual value
37-
RecenteredValues <- (ActualValues - AverageSimulatedValues)[seq_along(r) - 1]
38-
WeightedRecenteredValues <- (RecenteredValues[!is.nan(RecenteredValues)])^2 *
39-
rIncrements[!is.nan(RecenteredValues)]
40-
ActualU <- sum(WeightedRecenteredValues)
118+
# Calculate the Ui statistic for the actual value
119+
ActualU <- vapply(
120+
1,
121+
FUN = Ui,
122+
FUN.VALUE = 0,
123+
ValueToTest = data.frame(ActualValues)
124+
)
41125

42-
# Return the rank
43-
return(mean(ActualU < SimulatedU))
126+
# Return the p_value. If the p_value is equal to 0, a conservative p_value
127+
# of 1/(n + 1) is returned.
128+
return(ifelse(mean(ActualU < SimulatedU) == 0,
129+
1 / (1 + NumberOfSimulations),
130+
mean(ActualU < SimulatedU)))
44131
}
132+

dbmss.Rproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Version: 1.0
2-
ProjectId: cd05f60b-7ee9-4c31-a1ff-518244563ba2
2+
ProjectId: 9bfc979a-9f1d-4b54-90ce-a8660d10f9b2
33

44
RestoreWorkspace: Default
55
SaveWorkspace: Default

man/GoFtest.Rd

Lines changed: 57 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,75 @@
77
Calculates the risk to reject the null hypothesis erroneously, based on the distribution of the simulations.
88
}
99
\usage{
10-
GoFtest(Envelope)
10+
GoFtest(Envelope,
11+
Test = "DCLF",
12+
Scaling = "Asymmetric",
13+
Range = NULL)
1114
}
1215
\arguments{
1316
\item{Envelope}{
1417
An envelope object (\code{\link[spatstat.explore]{envelope}}) containing simulations in its \code{simfuns} attribute. It may be the result of any estimation function of the dbmss package or obtained by the \code{\link[spatstat.explore]{envelope}} function with argument \code{savefuns=TRUE}.
1518
}
19+
\item{Test}{
20+
A string specifying the method to summarize the deviation from the null hypothesis. The deviation may be summarized using
21+
"\emph{DCLF}": the integrated squared deviation is utilized, a Diggle-Cressie-Loosmore-Ford (DCLF) test is performed (default);
22+
"\emph{Integral}": the integrated absolute deviation is utilized;
23+
"\emph{MAD}": the Maximum Absolute Deviation (MAD) is utilized, a MAD test is perfomed.
24+
}
25+
\item{Scaling}{
26+
A string specifying the method to scale the residuals of the test. Scaling may be
27+
"\emph{Asymmetric}": the differences between the 2.5\% lower quantiles of simulations and the expected value is utilized to scale negative residuals, and the differences between the 2.5\% upper quantiles and the expected value is utilized to scale positive residuals (default);
28+
"\emph{Quantile}": ranges between the 2.5\% upper and 2.5\% lower quantiles is utilized to scale residuals, disregarding whether residuals are negative or positive;
29+
"\emph{Studentized}": the standard deviation of simulations is utilized;
30+
"\emph{None}": does not scale residuals.
31+
}
32+
\item{Range}{
33+
A vector of length two containing the minimum and the maximum distance over which to compute the test. If \code{NULL}, or if the selected range is outside the simulated distances, all distances in the \code{Envelope} argument are used.
34+
}
1635
}
1736
\details{
18-
This test was introduced by Diggle(1983) and extensively developped by Loosmore and Ford (2006) for \emph{K}, and applied to \emph{M} by Marcon et al. (2012).
37+
This function regroups multiple Goodness of Fit tests: the DCLF test (Diggle 1986, Cressie 1993, Loosmore & Ford 2006, Marcon et al. 2012, Myllymäki et al. 2015), the integrated deviation test (Diggle 1979), and the MAD test (Diggle 1979, Myllymäki et al. 2015).
38+
39+
Monte Carlo simulations assess how well observed distance-based measures of spatial structure align with expected measures under the null hypothesis, estimated here by the mean of simulations. For both observed and simulated measures, residualscalculated as the differences between observed and expected valuesare computed at each distance r.
40+
These residuals are then transformed into test-specific statistics \emph{u}: \code{Test = "MAD"} uses the maximum absolute residual; \code{Test = "Integral"}, and \code{Test = "DCLF"} use residuals to approximate the integrated deviation, and the integrated squared deviation.
41+
A rank test on \emph{u} evaluates the null hypothesis that the observed point pattern originates from the same point process as the simulations.
42+
43+
The unequal variance of the residuals at different intervals of r influences \emph{u} statistics, and the power of Goodness of Fit tests. Myllymäki et al. (2015) proposed to scale residuals using pointwise quantiles (\code{Scaling = "Asymmetric"}, and \code{Scaling = "Quantile"}), and pointwise standard deviations (\code{Scaling = "Studentized"}).
44+
Goodness of Fit tests are sensitive to the distance interval over which they are performed. It is recommended to choose the distance interval to test based on \emph{a priori} hypotheses (Wiegand & Moloney 2013, Baddeley et al. 2015).
1945
}
2046
\value{
2147
A p-value.
2248
}
2349
\references{
24-
Diggle, P. J. (1983). \emph{Statistical analysis of spatial point patterns}. Academic Press, London. 148 p.
25-
26-
Loosmore, N. B. and Ford, E. D. (2006). Statistical inference using the G or K point pattern spatial statistics. \emph{Ecology} 87(8): 1925-1931.
27-
28-
Marcon, E., F. Puech and S. Traissac (2012). Characterizing the relative spatial structure of point patterns. International \emph{Journal of Ecology} 2012(Article ID 619281): 11.
50+
Baddeley, A., Rubak, E., & Turner, R. (2015). Spatial Point Patterns: Methodology and Applications with R (0 ed.). Chapman and Hall/CRC. https://doi.org/10.1201/b19708
51+
52+
Cressie, N. A. C. (1993). Statistics for Spatial Data (1st ed.). Wiley. https://doi.org/10.1002/9781119115151
53+
54+
Diggle, P. J. (1979). On Parameter Estimation and Goodness-of-Fit Testing for Spatial Point Patterns. Biometrics, 35(1), 87. https://doi.org/10.2307/2529938
55+
56+
Diggle, P. J. (1986). Displaced amacrine cells in the retina of a rabbit: Analysis of a bivariate spatial point pattern. Journal of Neuroscience Methods, 18(12), 115125. https://doi.org/10.1016/0165-0270(86)90115-9
57+
58+
Loosmore, N. B., & Ford, E. D. (2006). Statistical inference using the G or K point pattern spatial statistics. Ecology, 87(8), 19251931. https://doi.org/10.1890/0012-9658(2006)87[1925:SIUTGO]2.0.CO;2
59+
60+
Marcon, E., Puech, F., & Traissac, S. (2012). Characterizing the Relative Spatial Structure of Point Patterns. International Journal of Ecology, 2012, 111. https://doi.org/10.1155/2012/619281
61+
62+
Myllymäki, M., & Mrkvička, T. (2024). GET: Global Envelopes in R. Journal of Statistical Software, 111(3). https://doi.org/10.18637/jss.v111.i03
63+
64+
Myllymäki, M., Grabarnik, P., Seijo, H., & Stoyan, D. (2015). Deviation test construction and power comparison for marked spatial point patterns. Spatial Statistics, 11, 1934. https://doi.org/10.1016/j.spasta.2014.11.004
65+
66+
Wiegand, T., & Moloney, K. A. (2013). Handbook of Spatial Point-Pattern Analysis in Ecology (0 ed.). Chapman and Hall/CRC. https://doi.org/10.1201/b16195
67+
2968
}
3069
\note{
31-
No support exists in the literature to apply the GoF test to non-cumulative functions (\emph{g}, \emph{Kd}...).
32-
3370
\code{\link{Ktest}} is a much better test (it does not rely on simulations) but it is limited to the \emph{K} function against complete spatial randomness (CSR) in a rectangle window.
71+
72+
This test is inspired from Myllymäki et al. (2015), and a similar function \code{deviation_test()} exists in the R package \emph{GET} (Myllymäki & Mrkvička, 2024)
3473
}
3574
\seealso{
3675
\code{\link{Ktest}}
3776
}
3877
\examples{
78+
3979
# Simulate a Matern (Neyman Scott) point pattern
4080
nclust <- function(x0, y0, radius, n) {
4181
return(runifdisc(n, radius, centre=c(x0, y0)))
@@ -50,6 +90,12 @@ Alpha <- .10
5090
Envelope <- KEnvelope(as.wmppp(X), r, NumberOfSimulations, Alpha)
5191
autoplot(Envelope, ./(pi*r^2) ~ r)
5292

53-
# GoF test. Power is correct if enough simulations are run (say >1000).
54-
paste("p-value =", GoFtest(Envelope))
93+
# DCLF test using asymmetric scaling.
94+
# Power is correct if enough simulations are run (say >1000).
95+
paste("p-value =", GoFtest(Envelope, Test = "DCLF",
96+
Scaling = "Asymmetric", Range = c(0.1, 0.2)))
97+
98+
# MAD test using asymmetric scaling.
99+
paste("p-value =", GoFtest(Envelope, Test = "MAD",
100+
Scaling = "Asymmetric", Range = c(0.1, 0.2)))
55101
}

0 commit comments

Comments
 (0)