|
51 | 51 | #' The order is then selected using the Akaike information criterion (AIC; default), |
52 | 52 | #' see the argument \code{k} to change the criterion. |
53 | 53 | #' \code{lag.max} of length 2 automatically sets \code{p.free = TRUE}. |
| 54 | +#' @param ic logical indicating whether the lags should be selected |
| 55 | +#' (\code{ic = TRUE}; default) or used as specified by \code{p} |
| 56 | +#' (\code{ic = FALSE}; then the arguments \code{p.free}, \code{k}, and \code{lag.max} are ignored). |
54 | 57 | #' @param k numeric scalar specifying the weight of the equivalent degrees of freedom part |
55 | 58 | #' in the AIC formula. Default \code{k = 2} corresponds to the traditional AIC. |
56 | 59 | #' Use \code{k = log(n)} to use the Bayesian information criterion instead |
@@ -124,6 +127,7 @@ causality_pred <- function(y, |
124 | 127 | p.free = FALSE, |
125 | 128 | lag.restrict = 0L, |
126 | 129 | lag.max = NULL, |
| 130 | + ic = TRUE, |
127 | 131 | k = 2, |
128 | 132 | B = 500L, |
129 | 133 | test = 0.3, |
@@ -168,64 +172,67 @@ causality_pred <- function(y, |
168 | 172 | n_test <- n - n_train |
169 | 173 |
|
170 | 174 | # Part 1: Estimate p on the training set if using an information criterion ---- |
171 | | - if (is.null(p) && is.null(lag.max)) { |
172 | | - stop("Please specify p or lag.max.") |
173 | | - } |
174 | | - if (!is.null(lag.max)) { # then select p |
175 | | - if (length(lag.max) == 2) { |
176 | | - p.free = TRUE |
177 | | - } else { |
178 | | - lag.max <- rep(lag.max[1], 2) |
179 | | - } |
180 | | - maxl <- max(lag.max) |
181 | | - if (lag.restrict >= lag.max[2]) { |
182 | | - warning("lag.restrict >= lag.max or lag.max[2]. Using lag.restrict = 0 instead.") |
183 | | - lag.restrict <- 0 |
184 | | - } |
185 | | - # Embed both X and Y up to maxl to have the results of the same length |
186 | | - if (lag.restrict > 0) { |
187 | | - lagX <- embed(y[1:n_train, cause], maxl + 1)[, -c(1:(lag.restrict + 1)), drop = FALSE] |
188 | | - } else { |
189 | | - lagX <- embed(y[1:n_train, cause], maxl + 1)[, -1, drop = FALSE] |
| 175 | + if (ic){ |
| 176 | + if (is.null(p) && is.null(lag.max)) { |
| 177 | + stop("Please specify p or lag.max.") |
190 | 178 | } |
191 | | - lagY <- embed(y[1:n_train, dep], maxl + 1) |
192 | | - # Information criterion for the model |
193 | | - if (p.free) { |
194 | | - best.ic <- Inf |
195 | | - for (p1 in 1:lag.max[1]) { |
196 | | - for (p2 in (lag.restrict + 1):lag.max[2]) { |
| 179 | + if (!is.null(lag.max)) { # then select p |
| 180 | + if (length(lag.max) == 2) { |
| 181 | + p.free = TRUE |
| 182 | + } else { |
| 183 | + lag.max <- rep(lag.max[1], 2) |
| 184 | + } |
| 185 | + maxl <- max(lag.max) |
| 186 | + if (lag.restrict >= lag.max[2]) { |
| 187 | + warning("lag.restrict >= lag.max or lag.max[2]. Using lag.restrict = 0 instead.") |
| 188 | + lag.restrict <- 0 |
| 189 | + } |
| 190 | + |
| 191 | + # Embed both X and Y up to maxl to have the results of the same length |
| 192 | + if (lag.restrict > 0) { |
| 193 | + lagX <- embed(y[1:n_train, cause], maxl + 1)[, -c(1:(lag.restrict + 1)), drop = FALSE] |
| 194 | + } else { |
| 195 | + lagX <- embed(y[1:n_train, cause], maxl + 1)[, -1, drop = FALSE] |
| 196 | + } |
| 197 | + lagY <- embed(y[1:n_train, dep], maxl + 1) |
| 198 | + # Information criterion for the model |
| 199 | + if (p.free) { |
| 200 | + best.ic <- Inf |
| 201 | + for (p1 in 1:lag.max[1]) { |
| 202 | + for (p2 in (lag.restrict + 1):lag.max[2]) { |
| 203 | + fit <- stats::lm.fit(x = cbind(1, |
| 204 | + lagY[, 2:(p1 + 1)], |
| 205 | + lagX[, 1:(p2 - lag.restrict), drop = FALSE]), |
| 206 | + y = lagY[, 1]) |
| 207 | + # see stats:::extractAIC.lm() but omit the scale option |
| 208 | + nfit <- length(fit$residuals) |
| 209 | + edf <- nfit - fit$df.residual |
| 210 | + RSS <- sum(fit$residuals^2, na.rm = TRUE) # stats:::deviance.lm(fit) |
| 211 | + dev <- nfit * log(RSS/nfit) |
| 212 | + fit.ic <- dev + k * edf |
| 213 | + if (fit.ic < best.ic) { |
| 214 | + best.ic <- fit.ic |
| 215 | + p <- c(p1, p2) |
| 216 | + } |
| 217 | + } |
| 218 | + } |
| 219 | + } else { |
| 220 | + IC <- sapply((lag.restrict + 1):lag.max[2], function(s) { |
197 | 221 | fit <- stats::lm.fit(x = cbind(1, |
198 | | - lagY[, 2:(p1 + 1)], |
199 | | - lagX[, 1:(p2 - lag.restrict), drop = FALSE]), |
| 222 | + lagY[, 2:(s + 1)], |
| 223 | + lagX[, 1:(s - lag.restrict), drop = FALSE]), |
200 | 224 | y = lagY[, 1]) |
201 | | - # see stats:::extractAIC.lm() but omit the scale option |
| 225 | + # see stats:::extractAIC.lm; but omit the scale option |
202 | 226 | nfit <- length(fit$residuals) |
203 | 227 | edf <- nfit - fit$df.residual |
204 | 228 | RSS <- sum(fit$residuals^2, na.rm = TRUE) # stats:::deviance.lm(fit) |
205 | 229 | dev <- nfit * log(RSS/nfit) |
206 | | - fit.ic <- dev + k * edf |
207 | | - if (fit.ic < best.ic) { |
208 | | - best.ic <- fit.ic |
209 | | - p <- c(p1, p2) |
210 | | - } |
211 | | - } |
| 230 | + dev + k * edf |
| 231 | + }) |
| 232 | + p <- which.min(IC) + lag.restrict |
212 | 233 | } |
213 | | - } else { |
214 | | - IC <- sapply((lag.restrict + 1):lag.max[2], function(s) { |
215 | | - fit <- stats::lm.fit(x = cbind(1, |
216 | | - lagY[, 2:(s + 1)], |
217 | | - lagX[, 1:(s - lag.restrict), drop = FALSE]), |
218 | | - y = lagY[, 1]) |
219 | | - # see stats:::extractAIC.lm; but omit the scale option |
220 | | - nfit <- length(fit$residuals) |
221 | | - edf <- nfit - fit$df.residual |
222 | | - RSS <- sum(fit$residuals^2, na.rm = TRUE) # stats:::deviance.lm(fit) |
223 | | - dev <- nfit * log(RSS/nfit) |
224 | | - dev + k * edf |
225 | | - }) |
226 | | - p <- which.min(IC) + lag.restrict |
227 | | - } |
228 | | - } # finish selection of p |
| 234 | + } # finish selection of p |
| 235 | + } |
229 | 236 | if (length(p) == 2) { |
230 | 237 | p.free = TRUE |
231 | 238 | } else { |
|
0 commit comments