Skip to content

Commit 379bdc1

Browse files
committed
Added get_richness() and spatial inputs to find_areas()
1 parent d0df42d commit 379bdc1

File tree

15 files changed

+430
-140
lines changed

15 files changed

+430
-140
lines changed

DESCRIPTION

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ Imports:
1818
maps,
1919
reshape2,
2020
segmented,
21+
sf,
2122
stringi,
23+
terra,
2224
tidyr
2325
LazyData: true
2426
Encoding: UTF-8

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export(find_areas)
1111
export(find_land)
1212
export(get_island_areas)
1313
export(get_presence_absence)
14+
export(get_richness)
1415
export(get_sources)
1516
export(remove_continents)
1617
import(mapdata)

R/create_SAR.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ create_SAR <- function(occurrences, npsi = 1, visualize = FALSE) {
3636
checkmate::assertDataFrame(occurrences)
3737
checkmate::assertNumeric(npsi)
3838
checkmate::testSubset(c("specificEpithet", "areas"), names(occurrences))
39+
checkmate::assertLogical(visualize)
3940
# Ensure columns are correct type
4041
checkmate::assertCharacter(occurrences$specificEpithet)
4142
checkmate::assertNumeric(occurrences$areas)

R/find_areas.R

Lines changed: 190 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,24 @@
11
#' Find areas of land masses.
22
#'
3-
#' Reference a dataset of island names and areas to find the areas of the land
4-
#' masses relevant to the taxon of interest.
3+
#' Find the areas of the land masses relevant to the taxon of interest with two
4+
#' options: a database of island names and areas, or a user-provided shapefile.
5+
#'
6+
#' The first method is to reference a built-in dataset of island names
7+
#' and areas to find the areas of the landmasses relevant to the taxon of
8+
#' interest. The user may also decide to input their own custom dataframe
9+
#' including names of relevant land masses and their associated areas to
10+
#' bypass using *ssarp*'s built-in dataset.
11+
#'
12+
#' The second method is to reference a user-supplied shapefile containing
13+
#' spatial information for the landmasses of interest in order to determine
14+
#' their areas.
15+
#'
16+
#' While the word "landmasses" was used heavily in this documentation, users
17+
#' supplying their own custom area dataframe or shapefile are encouraged to
18+
#' use this function in the *ssarp* workflow to create species- and speciation-
19+
#' area relationships for island-like systems such as lakes, fragmented habitat,
20+
#' and mountain peaks.
21+
#'
522
#' @param occs The dataframe that is returned by `ssarp::find_land()`. If using
623
#' a custom occurrence record dataframe, ensure that it has the following
724
#' columns: "acceptedScientificName", "genericName", "specificEpithet",
@@ -13,8 +30,15 @@
1330
#' @param area_custom A dataframe including names of land masses and their
1431
#' associated areas. This dataframe should be provided when the user would like
1532
#' to bypass using the built-in database of island names and areas. Please
16-
#' ensure that the custom dataframe includes the land mass's area in column 3
17-
#' and the name in column 5. (Optional)
33+
#' ensure that the custom dataframe includes the land mass's area in a column
34+
#' called "AREA" and the name in a column called "Name". (Optional)
35+
#' @param shapefile A shapefile (.shp) containing spatial information for
36+
#' the geographic locations of interest. (Optional)
37+
#' @param names If the user would like to restrict which polygons in the
38+
#' shapefile are included in the returned occurrence record dataframe, they can
39+
#' be specified here as a vector. If the user does not provide a vector, all of
40+
#' the non-NA names in the shapefile will be included
41+
#' (as found in shapefile$name). (Optional)
1842
#' @return A dataframe of the species name, island name, and island area
1943
#' @examples
2044
#' # The GBIF key for the Anolis genus is 8782549
@@ -29,7 +53,8 @@
2953
#' areas <- find_areas(occs = occs)
3054
#' @export
3155

32-
find_areas <- function(occs, area_custom = NULL) {
56+
find_areas <- function(occs, area_custom = NULL,
57+
shapefile = NULL, names = NULL) {
3358
# checkmate input verification
3459
checkmate::assertDataFrame(occs)
3560
checkmate::testSubset(
@@ -58,138 +83,180 @@ find_areas <- function(occs, area_custom = NULL) {
5883
# Not checking datasetKey because it is not relevant to the code and can be
5984
# any type, really
6085

61-
# Remove rows where First, Second, and Third are all NA
62-
# Create vector to hold row numbers
63-
minus <- rep(NA, nrow(occs))
64-
# Loop through dataframe
65-
for (i in seq_len(nrow(occs))) {
66-
if (nrow(occs) == 0) {
67-
cli::cli_alert_warning("No data in occurrence record dataframe")
68-
break
69-
}
70-
if (
71-
is.na(occs[i, "Third"]) &&
72-
is.na(occs[i, "Second"]) &&
73-
is.na(occs[i, "First"])
74-
) {
75-
minus[i] <- i
86+
##### NO SHAPEFILE #####
87+
if(is.null(shapefile)){
88+
# Remove any rows where the "specificEpithet" column is NA
89+
occs <- occs[!is.na(occs$specificEpithet),]
90+
91+
# Remove rows where First, Second, and Third are all NA
92+
# Create vector to hold row numbers
93+
minus <- rep(NA, nrow(occs))
94+
# Loop through dataframe
95+
for (i in seq_len(nrow(occs))) {
96+
if (nrow(occs) == 0) {
97+
cli::cli_alert_warning("No data in occurrence record dataframe")
98+
break
99+
}
100+
if (
101+
is.na(occs[i, "Third"]) &&
102+
is.na(occs[i, "Second"]) &&
103+
is.na(occs[i, "First"])
104+
) {
105+
minus[i] <- i
106+
}
76107
}
77-
}
78-
# Remove NAs (from initialization) from row number vector
79-
minus <- minus[!is.na(minus)]
80-
81-
# If all of minus is NA, that means that there are no rows to delete
82-
# Only delete rows when minus is not 0
83-
if (length(minus) != 0) {
84-
occs <- occs[-minus, ]
85-
}
86-
87-
# Add a temporary key-value pair to initialize
88-
island_dict <- Dict::Dict$new(
89-
bloop = 108
90-
)
91-
92-
# For each island name in the current dataframe,
93-
# find the area and add the pair to the dictionary
94-
95-
# First, create an empty list of island names
96-
islands <- list()
97-
98-
# Next, go through the occs dataframe and see if the Third column has a name.
99-
# If yes, add to the island list. If NA, go to the Second column.
100-
# If Second column is NA, go to the First column.
101-
cli::cli_alert_info("Recording island names...")
102-
for (i in seq_len(nrow(occs))) {
103-
if (nrow(occs) == 0) {
104-
cli::cli_alert_warning("No data in occurrence record dataframe")
105-
break
108+
# Remove NAs (from initialization) from row number vector
109+
minus <- minus[!is.na(minus)]
110+
111+
# If all of minus is NA, that means that there are no rows to delete
112+
# Only delete rows when minus is not 0
113+
if (length(minus) != 0) {
114+
occs <- occs[-minus, ]
106115
}
107-
if (!is.na(occs[i, "Third"])) {
108-
islands[i] <- occs[i, "Third"]
109-
} else if (!is.na(occs[i, "Second"])) {
110-
islands[i] <- occs[i, "Second"]
111-
} else if (!is.na(occs[i, "First"])) {
112-
islands[i] <- occs[i, "First"]
116+
117+
# Add a temporary key-value pair to initialize
118+
island_dict <- Dict::Dict$new(
119+
bloop = 108
120+
)
121+
122+
# For each island name in the current dataframe,
123+
# find the area and add the pair to the dictionary
124+
125+
# First, create an empty list of island names
126+
islands <- list()
127+
128+
# Next, go through the occs dataframe and see if the Third column has a name.
129+
# If yes, add to the island list. If NA, go to the Second column.
130+
# If Second column is NA, go to the First column.
131+
cli::cli_alert_info("Recording island names...")
132+
for (i in seq_len(nrow(occs))) {
133+
if (nrow(occs) == 0) {
134+
cli::cli_alert_warning("No data in occurrence record dataframe")
135+
break
136+
}
137+
if (!is.na(occs[i, "Third"])) {
138+
islands[i] <- occs[i, "Third"]
139+
} else if (!is.na(occs[i, "Second"])) {
140+
islands[i] <- occs[i, "Second"]
141+
} else if (!is.na(occs[i, "First"])) {
142+
islands[i] <- occs[i, "First"]
143+
}
113144
}
114-
}
115-
116-
# Next, eliminate duplicate entries in the list
117-
uniq_islands <- unique(islands)
118-
119-
# Next, add the island names as keys and their corresponding areas as values
120-
# If the user did not supply a custom dataframe, get island areas from
121-
# built-in island area dataset
122-
if (is.null(area_custom)) {
123-
area_file <- get_island_areas()
124-
} else {
125-
area_file <- area_custom
126-
}
127-
128-
# Look through the island area file and find the names in uniq_islands list
129-
cli::cli_alert_info("Assembling island dictionary...")
130-
# Initialize vector of island names from island area dataset with
131-
# "Island" appended
132-
area_file_append <- paste0(area_file$Name, " Island")
133-
# Initialize grep statements as NA
134-
grep_res <- grep_res2 <- grep_res3 <- NA
135-
136-
for (i in seq(uniq_islands)) {
137-
# Use grep for exact match in the area database
138-
# [1] picks the first match if the query gets multiple matches
139-
query <- paste0("^", as.character(uniq_islands[i]), "$")
140-
grep_res <- grep(query, area_file$Name)[1]
141-
142-
if (!is.na(grep_res)) {
143-
# If grep found a match, add it to island dictionary
144-
island_dict[as.character(uniq_islands[i])] <- area_file[grep_res, 3]
145+
146+
# Next, eliminate duplicate entries in the list
147+
uniq_islands <- unique(islands)
148+
149+
# Next, add the island names as keys and their corresponding areas as values
150+
# If the user did not supply a custom dataframe, get island areas from
151+
# built-in island area dataset
152+
if (is.null(area_custom)) {
153+
area_file <- get_island_areas()
145154
} else {
146-
# If it doesn't find the name directly from uniq_islands, try adding
147-
# "island" at the end
148-
query <- paste0("^", as.character(uniq_islands[i]), " Island$")
149-
grep_res2 <- grep(query, area_file$Name)[1]
150-
if (!is.na(grep_res2)) {
151-
# If grep found a match, add it to island dictionary
152-
island_dict[as.character(uniq_islands[i])] <- area_file[grep_res2, 3]
153-
}
155+
area_file <- area_custom
154156
}
155-
156-
# If it doesn't find the name from uniq_islands, look in area_file_append
157-
if (is.na(grep_res2)) {
157+
158+
# Look through the island area file and find the names in uniq_islands list
159+
cli::cli_alert_info("Assembling island dictionary...")
160+
# Initialize vector of island names from island area dataset with
161+
# "Island" appended
162+
area_file_append <- paste0(area_file$Name, " Island")
163+
# Initialize grep statements as NA
164+
grep_res <- grep_res2 <- grep_res3 <- NA
165+
166+
for (i in seq(uniq_islands)) {
167+
# Use grep for exact match in the area database
168+
# [1] picks the first match if the query gets multiple matches
158169
query <- paste0("^", as.character(uniq_islands[i]), "$")
159-
grep_res3 <- grep(query, area_file_append)[1]
160-
if (!is.na(grep_res3)) {
170+
grep_res <- grep(query, area_file$Name)[1]
171+
172+
if (!is.na(grep_res)) {
161173
# If grep found a match, add it to island dictionary
162-
island_dict[as.character(uniq_islands[i])] <- area_file[grep_res3, 3]
174+
island_dict[as.character(uniq_islands[i])] <- area_file[grep_res, "AREA"]
175+
} else {
176+
# If it doesn't find the name directly from uniq_islands, try adding
177+
# "island" at the end
178+
query <- paste0("^", as.character(uniq_islands[i]), " Island$")
179+
grep_res2 <- grep(query, area_file$Name)[1]
180+
if (!is.na(grep_res2)) {
181+
# If grep found a match, add it to island dictionary
182+
island_dict[as.character(uniq_islands[i])] <- area_file[grep_res2,
183+
"AREA"]
184+
}
185+
}
186+
187+
# If it doesn't find the name from uniq_islands, look in area_file_append
188+
if (is.na(grep_res2)) {
189+
query <- paste0("^", as.character(uniq_islands[i]), "$")
190+
grep_res3 <- grep(query, area_file_append)[1]
191+
if (!is.na(grep_res3)) {
192+
# If grep found a match, add it to island dictionary
193+
island_dict[as.character(uniq_islands[i])] <- area_file[grep_res3,
194+
"AREA"]
195+
}
163196
}
164197
}
165-
}
166-
167-
# Use the dictionary to add the areas to the final dataframe
168-
cli::cli_alert_info("Adding areas to final dataframe...")
169-
areas <- rep(0, times = nrow(occs))
170-
171-
for (i in seq_len(nrow(occs))) {
172-
if (!is.na(occs[i, "Third"]) && island_dict$has(occs[i, "Third"])) {
173-
areas[i] <- island_dict$get(occs[i, "Third"])
174-
} else if (
175-
!is.na(occs[i, "Second"]) && island_dict$has(occs[i, "Second"])
176-
) {
177-
areas[i] <- island_dict$get(occs[i, "Second"])
178-
} else if (!is.na(occs[i, "First"]) && island_dict$has(occs[i, "First"])) {
179-
areas[i] <- island_dict$get(occs[i, "First"])
180-
} else {
181-
areas[i] <- NA
198+
199+
# Use the dictionary to add the areas to the final dataframe
200+
cli::cli_alert_info("Adding areas to final dataframe...")
201+
areas <- rep(0, times = nrow(occs))
202+
203+
for (i in seq_len(nrow(occs))) {
204+
if (!is.na(occs[i, "Third"]) && island_dict$has(occs[i, "Third"])) {
205+
areas[i] <- island_dict$get(occs[i, "Third"])
206+
} else if (
207+
!is.na(occs[i, "Second"]) && island_dict$has(occs[i, "Second"])
208+
) {
209+
areas[i] <- island_dict$get(occs[i, "Second"])
210+
} else if (!is.na(occs[i, "First"]) && island_dict$has(occs[i, "First"])) {
211+
areas[i] <- island_dict$get(occs[i, "First"])
212+
} else {
213+
areas[i] <- NA
214+
}
182215
}
216+
217+
# Create final dataframe
218+
occs_final <- cbind(occs, areas)
219+
} else {
220+
##### SHAPEFILE #####
221+
checkmate::assertClass(shapefile, "SpatVector")
222+
223+
# Remove any rows where the "specificEpithet" column is NA
224+
occs <- occs[!is.na(occs$specificEpithet),]
225+
226+
# If the user input a "names" vector, use it to subset the SpatVector
227+
if(!is.null(names)){
228+
polygons <- terra::subset(shapefile, shapefile$name %in% names)
229+
} else {
230+
cli::cli_alert_info(
231+
"Using all names in the shapefile, this might extend processing time")
232+
# If the user did not input a "names" vector, use
233+
# the full list of polygon names
234+
# If there are any NAs in shapefile$name, remove them
235+
all_names <- shapefile$name[!is.na(shapefile$name)]
236+
237+
# Still subset the shapefile using these names, since NAs were removed
238+
polygons <- terra::subset(shapefile, shapefile$name %in% all_names)
239+
}
240+
241+
# Assign areas (in m^2) to polygons
242+
polygons$areas <- sf::st_area(sf::st_as_sf(polygons))
243+
244+
# Assign polygons based on the GPS coordinates in occs
245+
poly_dat <- terra::extract(polygons,
246+
data.frame(occs$decimalLongitude,
247+
occs$decimalLatitude))
248+
249+
# Trim to only include important columns
250+
poly_dat <- poly_dat[,c("featurecla", "name", "areas")]
251+
252+
# Add polygon info for each occurrence record to occs
253+
occs_final <- cbind(occs, poly_dat)
183254
}
184-
185-
# Create final dataframe
186-
occs_final <- cbind(occs, areas)
187-
188255
# Remove rows with NA in area column
189256
occs_final <- occs_final[!is.na(occs_final$areas), ]
190-
257+
191258
# Ensure areas are numeric
192259
occs_final$areas <- as.numeric(occs_final$areas)
193-
260+
194261
return(occs_final)
195262
}

0 commit comments

Comments
 (0)