Skip to content

Commit 6c6236a

Browse files
committed
actually commit select_n_knots
1 parent 0c5bc68 commit 6c6236a

File tree

10 files changed

+355
-21
lines changed

10 files changed

+355
-21
lines changed

R/clean_and_resample.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
#' @param dir_out A character string specifying the directory for output files.
2828
#' @param test Logical. TRUE/FALSE. If TRUE, will only run first two resampling
2929
#' tests.
30-
#' @param n_knots Numeric. Default = 500.
30+
#' @param n_knots Numeric. This will be calculated for you based on sample size
31+
#' if there is no input.
3132
#' @param model_type String. Default = "wrapper_sdmtmb", but can be any preset
3233
#' wrapper_*() function or a premade home built function.
3334
#' @param bio A data frame containing the biological data, if applicable. NULL

R/resample_tests.R

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,43 @@
3737
#' }
3838
#' @examples
3939
#' \dontrun{
40-
#' resample_tests() # TO DO: NEED EXAMPLE OF HOW TO USE
40+
#' catch <- surveyresamplr::noaa_nwfsc_catch
41+
#' grid_yrs <- sdmTMB::replicate_df(
42+
#' dat = surveyresamplr::noaa_nwfsc_catch, time_name = "year",
43+
#' time_values = unique(catch$year)
44+
#' )
45+
#' spp_list <- data.frame(
46+
#' srvy = "CA",
47+
#' common_name = "arrowtooth flounder",
48+
#' filter_lat_gt = 34,
49+
#' filter_lat_lt = NA,
50+
#' filter_depth = NA,
51+
#' model_fn = "total_catch_wt_kg ~ 0 + factor(year) + pass",
52+
#' model_family = "delta_gamma",
53+
#' model_anisotropy = TRUE,
54+
#' model_spatiotemporal = "iid, iid"
55+
#' )
56+
#'
57+
#' spp_dfs <- cleanup_by_species(
58+
#' spp_info = spp_list,
59+
#' catch,
60+
#' seq_from = 0.1,
61+
#' seq_to = 1,
62+
#' seq_by = 0.1,
63+
#' tot_dataframes = 91,
64+
#' replicate_num = 10,
65+
#' )
66+
#'
67+
#' resample_tests(
68+
#' spp_dfs = spp_dfs,
69+
#' spp_info = spp_list,
70+
#' grid_yrs = grid_yrs,
71+
#' dir_out = dir_out,
72+
#' test = TRUE,
73+
#' model_type = "wrapper_sdmtmb"
74+
#' )
4175
#' }
76+
#'
4277
resample_tests <- function(spp_dfs, spp_info, grid_yrs, dir_out, test = FALSE,
4378
parallel = FALSE, n_knots = 500,
4479
model_type = "wrapper_sdmtmb") {
@@ -136,14 +171,9 @@ resample_tests <- function(spp_dfs, spp_info, grid_yrs, dir_out, test = FALSE,
136171
if (parallel) {
137172
n_cores <- future::availableCores()
138173
n_workers <- round(n_cores * 0.75)
139-
if (Sys.info()['sysname'] == 'Windows') {
140-
future::plan(future::multisession, workers = n_workers)
141-
message("...Running in parallel with multisession")
142-
} else {
143-
# Use multicore for Linux/macOS for better performance
144-
future::plan(future::multicore, workers = n_workers)
145-
message("...Running in parallel with multicore")
146-
}
174+
175+
future::plan(future::multisession, workers = n_workers)
176+
147177
results <- furrr::future_map(
148178
seq_along(spp_files),
149179
run_parallel_models,
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Test for clean_and_resample function in surveyresamplr
2+
3+
test_that("clean_and_resample runs without error and processes realistic bio and catch data", {
4+
# Minimal spp_info row
5+
spp_info <- data.frame(
6+
srvy = "CA",
7+
common_name = "arrowtooth flounder",
8+
filter_lat_gt = 34,
9+
filter_lat_lt = NA,
10+
filter_depth = NA,
11+
model_fn = "total_catch_wt_kg ~ 0 + factor(year) + pass",
12+
model_family = "delta_gamma",
13+
model_anisotropy = TRUE,
14+
model_spatiotemporal = "iid, iid",
15+
stringsAsFactors = FALSE
16+
)
17+
18+
# Usable catch data matching noaa_nwfsc_catch structure
19+
catch <- data.frame(
20+
trawlid = 1:3,
21+
common_name = rep("arrowtooth flounder", 3),
22+
longitude_dd = c(-124.8186, -124.8533, -125.3244),
23+
latitude_dd = c(46.71917, 47.64056, 48.32917),
24+
year = 2001:2003,
25+
pass = c(1, 2, 1),
26+
area_swept_ha = c(1.2, 1.5, 1.3),
27+
total_catch_numbers = c(16, 22, 50),
28+
total_catch_wt_kg = c(28, 40, 60),
29+
depth_m = c(270, 130, 115),
30+
srvy = rep("CA", 3),
31+
stringsAsFactors = FALSE
32+
)
33+
34+
# grid_yrs with required columns
35+
grid_yrs <- data.frame(
36+
longitude_dd = rep(c(-124.8186, -124.8533, -125.3244), each = 3),
37+
latitude_dd = rep(c(46.71917, 47.64056, 48.32917), times = 3),
38+
pass = rep(c(1, 2, 1), times = 3),
39+
depth_m = rep(c(270, 130, 115), times = 3),
40+
area_km2 = runif(9, 2, 3), # random area between 2 and 3 km2
41+
stringsAsFactors = FALSE
42+
)
43+
44+
# Usable bio data matching noaa_nwfsc_bio structure
45+
bio <- data.frame(
46+
trawlid = 1:3,
47+
common_name = rep("arrowtooth flounder", 3),
48+
longitude_dd = c(-124.9829, -124.8533, -125.0133),
49+
latitude_dd = c(47.01417, 47.72861, 47.84111),
50+
year = 2001:2003,
51+
pass = c(1, 2, 1),
52+
sex = c("F", "M", "F"),
53+
length_cm = c(56, 35, 31),
54+
age = c(11, 3, 4),
55+
depth_m = c(170, 100, 120),
56+
project = rep("NWFSC", 3),
57+
srvy = rep("CA", 3),
58+
stringsAsFactors = FALSE
59+
)
60+
61+
# Temporary output directory
62+
dir_out <- withr::local_tempdir()
63+
64+
# Mock dependencies
65+
stub(clean_and_resample, "cleanup_by_species", function(...) {
66+
list(
67+
data.frame(source = "rep1", trawlid = 1:3),
68+
data.frame(source = "rep2", trawlid = 4:6)
69+
)
70+
})
71+
stub(clean_and_resample, "resample_tests", function(...) {
72+
TRUE
73+
})
74+
75+
# Run function and check that it doesn't error
76+
expect_no_error(
77+
clean_and_resample(
78+
spp_info = spp_info,
79+
catch = catch,
80+
seq_from = 0.1,
81+
seq_to = 1,
82+
seq_by = 0.1,
83+
tot_dataframes = 3,
84+
replicate_num = 2,
85+
grid_yrs = grid_yrs,
86+
dir_out = dir_out,
87+
bio = bio
88+
)
89+
)
90+
91+
# Optionally, check that bio.csv was written (if bio is provided)
92+
bio_file <- file.path(dir_out, "CA_arrowtooth_flounder", "bio.csv")
93+
expect_true(file.exists(bio_file))
94+
95+
# Check that the written bio.csv contains expected column names
96+
out_bio <- read.csv(bio_file)
97+
expect_true(all(c("trawlid", "common_name", "latitude_dd", "depth_m", "source") %in% names(out_bio)))
98+
})
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Test for cleanup_by_species function in surveyresamplr
2+
3+
test_that("cleanup_by_species returns expected structure and filters correctly", {
4+
# Minimal spp_info row (arrowtooth flounder, filter latitude > 34)
5+
spp_info <- data.frame(
6+
srvy = "CA",
7+
common_name = "arrowtooth flounder",
8+
filter_lat_gt = 34,
9+
filter_lat_lt = NA,
10+
filter_depth = NA,
11+
model_fn = "total_catch_wt_kg ~ 0 + factor(year) + pass",
12+
model_family = "delta_gamma",
13+
model_anisotropy = TRUE,
14+
model_spatiotemporal = "iid, iid",
15+
stringsAsFactors = FALSE
16+
)
17+
18+
# Minimal catch data matching required structure
19+
catch <- data.frame(
20+
srvy = rep("CA", 5),
21+
trawlid = 1:5,
22+
common_name = c("arrowtooth flounder", "arrowtooth flounder", "arrowtooth flounder", "other fish", "arrowtooth flounder"),
23+
total_catch_numbers = c(100, 120, 110, 50, 130),
24+
total_catch_wt_kg = c(10, 20, 30, 5, 40),
25+
latitude_dd = c(35, 36, 33, 37, 38),
26+
longitude_dd = c(-124.8, -124.9, -124.7, -124.8, -125.3),
27+
year = c(2001, 2002, 2001, 2002, 2001),
28+
pass = c(1, 2, 1, 2, 1),
29+
depth_m = c(270, 130, 115, 200, 150),
30+
area_swept_ha = c(1.2, 1.5, 1.3, 1.4, 1.6),
31+
stringsAsFactors = FALSE
32+
)
33+
34+
# Run function with reduced effort (for test speed)
35+
result <- cleanup_by_species(
36+
catch = catch,
37+
spp_info = spp_info,
38+
seq_from = 0.1,
39+
seq_to = 0.3,
40+
seq_by = 0.1,
41+
tot_dataframes = 11,
42+
replicate_num = 4
43+
)
44+
45+
# Check output is a named list of data frames
46+
expect_type(result, "list")
47+
expect_true(all(vapply(result, is.data.frame, logical(1))))
48+
# Check output data frames have at least the columns expected from catch
49+
expect_true(all(c("trawlid", "common_name", "latitude_dd", "depth_m") %in% names(result[[1]])))
50+
51+
# Check filter works (should only include common_name == "arrowtooth flounder" and latitude_dd > 34)
52+
all_lat <- unlist(lapply(result, function(df) df$latitude_dd))
53+
all_names <- unlist(lapply(result, function(df) df$common_name))
54+
expect_true(all(all_lat > 34))
55+
expect_true(all(all_names == "arrowtooth flounder"))
56+
57+
# Check that list names are present
58+
expect_true(!is.null(names(result)))
59+
})
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Mock species SDM wrapper function for testing
2+
wrapper_sdmtmb <- function(x, y, z, dir_spp, spp_info, n_knots) {
3+
# Return a list with mock components
4+
list(
5+
fit = list(), # Empty object, to be handled by sdmTMB::tidy and sanity
6+
index = data.frame(year = 1:3, index = c(1,2,3))
7+
)
8+
}
9+
10+
# Mock sdmTMB::tidy and sdmTMB::sanity
11+
sdmTMB::tidy <- function(fit, conf.int = FALSE, effects = NULL) {
12+
# Return a simple tidy data frame
13+
data.frame(term = "intercept", estimate = 1.23)
14+
}
15+
sdmTMB::sanity <- function(fit) {
16+
# Return a simple sanity data frame
17+
data.frame(sane = TRUE)
18+
}
19+
20+
# Setup test data
21+
result <- tibble::tibble(
22+
source = c("0.1_2", "0.1_4", "0.1_4", "0.2_2", "0.2_3", "0.2_4", "0.2_4", "0.3_2", "0.3_3"),
23+
trawlid = c(5, 1, 2, 5, 5, 1, 2, 5, 5),
24+
RandomAssignment = rep(1, 9),
25+
srvy = rep("CA", 9),
26+
common_name = rep("arrowtooth flounder", 9),
27+
total_catch_numbers = c(130, 100, 120, 130, 130, 100, 120, 130, 130),
28+
total_catch_wt_kg = c(40, 10, 20, 40, 40, 10, 20, 40, 40),
29+
latitude_dd = c(38, 35, 36, 38, 38, 35, 36, 38, 38),
30+
longitude_dd = c(-125.3, -124.8, -124.9, -125.3, -125.3, -124.8, -124.9, -125.3, -125.3),
31+
year = c(2001, 2001, 2002, 2001, 2001, 2001, 2002, 2001, 2001),
32+
pass = c(1, 1, 2, 1, 1, 1, 2, 1, 1),
33+
depth_m = c(150, 270, 130, 150, 150, 270, 130, 150, 150),
34+
area_swept_ha = c(1.6, 1.2, 1.5, 1.6, 1.6, 1.2, 1.5, 1.6, 1.6)
35+
)
36+
spp_dfs <- split(result, result$source)
37+
38+
39+
spp_info <- data.frame(
40+
srvy = "CA",
41+
common_name = "arrowtooth flounder",
42+
filter_lat_gt = 34,
43+
filter_lat_lt = NA,
44+
filter_depth = NA,
45+
model_fn = "total_catch_wt_kg ~ 0 + factor(year) + pass",
46+
model_family = "delta_gamma",
47+
model_anisotropy = TRUE,
48+
model_spatiotemporal = "iid, iid",
49+
stringsAsFactors = FALSE
50+
)
51+
52+
grid_yrs <- data.frame(
53+
longitude_dd = c(-124.81, -124.85, -125.32, -125.02, -124.55, -123.99, -125.76, -125.46, -124.36, -124.08),
54+
latitude_dd = c(46.85, 47.60, 48.25, 47.81, 46.42, 45.59, 48.08, 47.94, 43.75, 44.67),
55+
pass = rep(0, 10),
56+
depth_m = c(-159, -112, -97, -30, -191, -35, -655, -335, -110, -11),
57+
area_km2 = rep(0, 10),
58+
srvy = rep("CA", 10)
59+
)
60+
61+
dir_out <- tempdir()
62+
63+
test_that("resample_tests runs and creates output files", {
64+
# Remove any prior test directory
65+
test_dir <- paste0(dir_out, paste0(spp_info$srvy, "_Test_Species/"))
66+
if (dir.exists(test_dir)) unlink(test_dir, recursive = TRUE)
67+
68+
expect_no_error({
69+
resample_tests(
70+
spp_dfs = spp_dfs,
71+
spp_info = spp_info,
72+
grid_yrs = grid_yrs,
73+
dir_out = dir_out,
74+
test = TRUE,
75+
parallel = FALSE,
76+
n_knots = NULL,
77+
model_type = "wrapper_sdmtmb"
78+
)
79+
})
80+
81+
# Check output files exist
82+
expect_true(file.exists(file.path(test_dir, "fit_df.csv")))
83+
expect_true(file.exists(file.path(test_dir, "fit_pars.csv")))
84+
expect_true(file.exists(file.path(test_dir, "fit_check.csv")))
85+
expect_true(file.exists(file.path(test_dir, "index.csv")))
86+
87+
# Check that parquet files exist
88+
expect_true(file.exists(file.path(test_dir, "df_1.parquet")))
89+
expect_true(file.exists(file.path(test_dir, "df_2.parquet")))
90+
91+
# Read and check CSV contents
92+
fit_df <- read.csv(file.path(test_dir, "fit_df.csv"))
93+
expect_true("estimate" %in% names(fit_df))
94+
fit_index <- read.csv(file.path(test_dir, "index.csv"))
95+
expect_true(all(c("year", "index") %in% names(fit_index)))
96+
})
97+
98+
test_that("select_n_knots returns expected values", {
99+
expect_equal(select_n_knots(500), 50)
100+
expect_equal(select_n_knots(3000), 100)
101+
expect_equal(select_n_knots(9000), 200)
102+
expect_equal(select_n_knots(20000), 500)
103+
expect_equal(select_n_knots(100000), 1000)
104+
})
105+

vignettes/a-simple-example.Rmd

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,16 @@ The `purrr::map` function is used to apply the `clean_and_resample` function to
245245
The number you input for `n_knots` can make or break your model. We have more details on considerations when choosing `n_knots` or allowing the function to select the number of knots for you in the [Importance of `n_knots` in `{sdmTMB}` Models section](#importance-of-sdmtmb-models).
246246
:::
247247

248-
```{r run-models, eval = FALSE}
248+
You can run this in testing mode
249+
```{r}
250+
clean_and_resample(spp_info = spp_list,
251+
catch,
252+
seq_from, seq_to, seq_by,
253+
tot_dataframes, replicate_num,
254+
grid_yrs, dir_out, test = TRUE, n_knots = 300)
255+
```
256+
257+
```{r run-models}
249258
start.time <- Sys.time()
250259
purrr::map(
251260
seq_len(nrow(spp_list)),
@@ -256,19 +265,11 @@ purrr::map(
256265
n_knots = 300
257266
)
258267
)
259-
write.csv(
260-
x = data.frame(
261-
time = as.numeric(Sys.time() - start.time),
262-
units = units(Sys.time() - start.time)
263-
),
264-
file = paste0(dir_final, srvy, "_simple_time.csv")
265-
)
266268
```
267269

268270
Don't freak out if this takes a long time. It should take the following amount of time:
269-
```{r time}
270-
a <- read.csv(file = paste0(dir_final, srvy, "_simple_time.csv"))
271-
print(paste0("Completed in: ", round(a$time, 2), " ", a$units))
271+
```{r, echo = FALSE}
272+
print(paste0("Completed in: ", round(as.numeric(Sys.time() - start.time), 2), " ", units(Sys.time() - start.time)))
272273
```
273274

274275
```{r sink-results-backup, include = FALSE}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"srvy","common_name","model_fn","model_family","model_anisotropy","model_spatiotemporal","effort","hessian_ok","eigen_values_ok","nlminb_ok","range_ok","gradients_ok","se_magnitude_ok","se_na_ok","sigmas_ok","all_ok"
2+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_1",FALSE,TRUE,TRUE,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE
3+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_2",FALSE,TRUE,TRUE,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE
4+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_3",FALSE,TRUE,TRUE,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
"srvy","common_name","model_fn","model_family","model_anisotropy","model_spatiotemporal","effort","term","estimate","std.error","conf.low","conf.high"
2+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_1","factor(year)2021",20.3035148497297,1020.3623946318,-1979.57002980765,2020.17705950711
3+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_1","factor(year)2022",21.4673087246129,2860.24471327961,-5584.50931627451,5627.44393372374
4+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_1","factor(year)2023",21.006938931454,2166.32132286837,-4224.90483283172,4266.91871069463
5+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_1","factor(year)2024",20.7459415141075,1707.14795575973,-3325.20256805615,3366.69445108436
6+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_2","factor(year)2021",21.7210323464771,2442.61938463255,-4765.72498947272,4809.16705416567
7+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_2","factor(year)2022",21.6242224589619,NA,NA,NA
8+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_2","factor(year)2023",21.5654413409925,NA,NA,NA
9+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_2","factor(year)2024",21.4913033210022,1669.97594763063,-3251.60140908318,3294.58401572518
10+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_3","factor(year)2021",20.4135309519096,1679.53172014375,-3271.40815142244,3312.23521332626
11+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_3","factor(year)2022",20.4193816640038,1170.4479263295,-2273.61639972141,2314.45516304942
12+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_3","factor(year)2023",20.4135309512272,NA,NA,NA
13+
"EBS","walleye pollock","total_catch_wt_kg ~ 0 + factor(year)","delta_gamma",TRUE,"iid, iid","05_3","factor(year)2024",20.3390234786106,1783.02587428651,-3474.327473626,3515.00552058322

0 commit comments

Comments
 (0)