Renamed locality columns to "first", "second", and "third"

kmartinet · kmartinet · commit d5c051f73094 · 2025-09-05T13:14:19.000-07:00
diff --git a/R/find_areas.R b/R/find_areas.R
@@ -22,7 +22,7 @@
 #' @param occs The dataframe that is returned by `ssarp::find_land()`. If using
 #' a custom occurrence record dataframe, ensure that it has the following
 #' columns: "genericName", "specificEpithet",
-#' "decimalLongitude", "decimalLatitude", "First", "Second", "Third",
+#' "decimalLongitude", "decimalLatitude", "first", "second", "third",
 #' "datasetKey". The "datasetKey" column is important for GBIF records and
 #' identifies the dataset to which the occurrence record belongs. Custom
 #' dataframes without this style of data organization should fill the column
@@ -65,9 +65,9 @@ find_areas <- function(
     c(
       "genericName",
       "specificEpithet",
-      "First",
-      "Second",
-      "Third",
+      "first",
+      "second",
+      "third",
       "datasetKey"
     ),
     names(occs)
@@ -89,9 +89,9 @@ find_areas <- function(
   # Ensure other columns are correct type
   checkmate::assertCharacter(occs$genericName)
   checkmate::assertCharacter(occs$specificEpithet)
-  checkmate::assertCharacter(occs$First)
-  checkmate::assertCharacter(occs$Second)
-  checkmate::assertCharacter(occs$Third)
+  checkmate::assertCharacter(occs$first)
+  checkmate::assertCharacter(occs$second)
+  checkmate::assertCharacter(occs$third)
   # Not checking datasetKey because it is not relevant to the code and can be
   #  any type, really
 
@@ -100,7 +100,7 @@ find_areas <- function(
     # Remove any rows where the "specificEpithet" column is NA
     occs <- occs[!is.na(occs$specificEpithet), ]
 
-    # Remove rows where First, Second, and Third are all NA
+    # Remove rows where first, second, and third are all NA
     # Create vector to hold row numbers
     minus <- rep(NA, nrow(occs))
     # Loop through dataframe
@@ -112,9 +112,9 @@ find_areas <- function(
         break
       }
       if (
-        is.na(occs[i, "Third"]) &&
-          is.na(occs[i, "Second"]) &&
-          is.na(occs[i, "First"])
+        is.na(occs[i, "third"]) &&
+          is.na(occs[i, "second"]) &&
+          is.na(occs[i, "first"])
       ) {
         minus[i] <- i
       }
@@ -139,9 +139,10 @@ find_areas <- function(
     # First, create an empty list of island names
     islands <- list()
 
-    # Next, go through the occs dataframe and see if the Third column has a name.
-    # If yes, add to the island list. If NA, go to the Second column.
-    # If Second column is NA, go to the First column.
+    # Next, go through the occs dataframe and see if the 'third' column 
+    #  has a name.
+    # If yes, add to the island list. If NA, go to the 'second' column.
+    # If 'second' column is NA, go to the 'first' column.
     if (!getOption("ssarp.silent", FALSE)) {
       cli::cli_alert_info("Recording island names...")
     }
@@ -152,12 +153,12 @@ find_areas <- function(
         }
         break
       }
-      if (!is.na(occs[i, "Third"])) {
-        islands[i] <- occs[i, "Third"]
-      } else if (!is.na(occs[i, "Second"])) {
-        islands[i] <- occs[i, "Second"]
-      } else if (!is.na(occs[i, "First"])) {
-        islands[i] <- occs[i, "First"]
+      if (!is.na(occs[i, "third"])) {
+        islands[i] <- occs[i, "third"]
+      } else if (!is.na(occs[i, "second"])) {
+        islands[i] <- occs[i, "second"]
+      } else if (!is.na(occs[i, "first"])) {
+        islands[i] <- occs[i, "first"]
       }
     }
 
@@ -230,16 +231,16 @@ find_areas <- function(
     areas <- rep(0, times = nrow(occs))
 
     for (i in seq_len(nrow(occs))) {
-      if (!is.na(occs[i, "Third"]) && island_dict$has(occs[i, "Third"])) {
-        areas[i] <- island_dict$get(occs[i, "Third"])
+      if (!is.na(occs[i, "third"]) && island_dict$has(occs[i, "third"])) {
+        areas[i] <- island_dict$get(occs[i, "third"])
       } else if (
-        !is.na(occs[i, "Second"]) && island_dict$has(occs[i, "Second"])
+        !is.na(occs[i, "second"]) && island_dict$has(occs[i, "second"])
       ) {
-        areas[i] <- island_dict$get(occs[i, "Second"])
+        areas[i] <- island_dict$get(occs[i, "second"])
       } else if (
-        !is.na(occs[i, "First"]) && island_dict$has(occs[i, "First"])
+        !is.na(occs[i, "first"]) && island_dict$has(occs[i, "first"])
       ) {
-        areas[i] <- island_dict$get(occs[i, "First"])
+        areas[i] <- island_dict$get(occs[i, "first"])
       } else {
         areas[i] <- NA
       }
diff --git a/R/find_land.R b/R/find_land.R
@@ -15,13 +15,13 @@
 #' standard location for island names in its returned information, so using it
 #' will likely require the returned dataframe to be cleaned by the user.
 #' @return A dataframe of the species name, longitude, latitude, and three parts
-#' of occurrence information. "First" is the name used to describe the largest
-#' possible area of land where the occurrence point is found. "Second" is the
+#' of occurrence information. "first" is the name used to describe the largest
+#' possible area of land where the occurrence point is found. "second" is the
 #' name used to describe the second-largest possible area of land that
-#' corresponds with the occurrence point. "Third" is the most specific area of
+#' corresponds with the occurrence point. "third" is the most specific area of
 #' land that corresponds with the occurrence point. Functions later in the ssarp
-#' pipeline default to checking whether "Third" has an entry, then look at
-#' "Second," and then "First."
+#' pipeline default to checking whether "third" has an entry, then look at
+#' "second," and then "first."
 #' @examples
 #' # The GBIF key for the Anolis genus is 8782549
 #' # Read in example dataset filtered from:
@@ -101,17 +101,17 @@ find_land <- function(occurrences, fillgaps = FALSE) {
   # But sometimes there are three...
   suppressWarnings(
     occs <- occs |>
-      tidyr::separate(where2, c("First", "Second", "Third"), sep = ":")
+      tidyr::separate(where2, c("first", "second", "third"), sep = ":")
   )
   colnames(occs) <- c(
     "acceptedScientificName",
     "genericName",
     "specificEpithet",
     "decimalLongitude",
     "decimalLatitude",
-    "First",
-    "Second",
-    "Third",
+    "first",
+    "second",
+    "third",
     "datasetKey"
   )
 
@@ -128,7 +128,7 @@ find_land <- function(occurrences, fillgaps = FALSE) {
         }
         break
       }
-      if (is.na(occs[i, "First"])) {
+      if (is.na(occs[i, "first"])) {
         # Get lon and lat
         longitude <- occs[i, "decimalLongitude"]
         latitude <- occs[i, "decimalLatitude"]
diff --git a/R/find_pam_areas.R b/R/find_pam_areas.R
@@ -77,8 +77,8 @@ find_pam_areas <- function(pam, area_custom = NULL) {
       # Fix colnames
       colnames(sp_df) <- c("genericName", "specificEpithet")
 
-      # Add island to dataframe (call it "Third" for ssarp::find_areas())
-      sp_df$Third <- island
+      # Add island to dataframe (call it "third" for ssarp::find_areas())
+      sp_df$third <- island
 
       # Add this small dataframe to the list
       dat[[length(dat) + 1]] <- sp_df
@@ -88,12 +88,12 @@ find_pam_areas <- function(pam, area_custom = NULL) {
   # Turn dat into a dataframe
   occs <- do.call(rbind.data.frame, dat)
 
-  # Add "First" and "Second" columns so it can be used with ssarp::find_areas()
-  occs$First <- NA
-  occs$Second <- NA
+  # Add "first" and "second" columns so it can be used with ssarp::find_areas()
+  occs$first <- NA
+  occs$second <- NA
   # They also need to be characters
-  occs$First <- as.character(occs$First)
-  occs$Second <- as.character(occs$Second)
+  occs$first <- as.character(occs$first)
+  occs$second <- as.character(occs$second)
 
   return(find_areas(occs, area_custom))
 }
diff --git a/R/get_presence_absence.R b/R/get_presence_absence.R
@@ -8,11 +8,11 @@
 #' - "areas" containing the areas associated with the land masses of interest
 #' - "specificEpithet" containing the names of the species living on those
 #' islands
-#' - "First" containing locality information. In the ssarp workflow, this
+#' - "first" containing locality information. In the ssarp workflow, this
 #' column contains the country name
-#' - "Second" containing locality information. In the ssarp workflow, this
+#' - "second" containing locality information. In the ssarp workflow, this
 #' column contains a province or island name
-#' - "Third" containing locality information. In the ssarp workflow, this
+#' - "third" containing locality information. In the ssarp workflow, this
 #' column contains the island name if the 7th column does not contain the
 #' island name
 #' @return A dataframe with a row for each island in the given occurrence
@@ -39,15 +39,15 @@ get_presence_absence <- function(occs) {
   # Checkmate input validation
   checkmate::assertDataFrame(occs)
   checkmate::testSubset(
-    c("specificEpithet", "areas", "First", "Second", "Third"),
+    c("specificEpithet", "areas", "first", "second", "third"),
     names(occs)
   )
   # Ensure columns are correct type
   checkmate::assertCharacter(occs$specificEpithet)
   checkmate::assertNumeric(occs$areas)
-  checkmate::assertCharacter(occs$First)
-  checkmate::assertCharacter(occs$Second)
-  checkmate::assertCharacter(occs$Third)
+  checkmate::assertCharacter(occs$first)
+  checkmate::assertCharacter(occs$second)
+  checkmate::assertCharacter(occs$third)
 
   # Create a dataframe that counts how many records there are for each species
   #  in each island
@@ -78,7 +78,7 @@ get_presence_absence <- function(occs) {
       area_names,
       occs[
         which(occs$areas == new_occs[i, 1])[1],
-        c("First", "Second", "Third")
+        c("first", "second", "third")
       ]
     )
   }
@@ -87,7 +87,7 @@ get_presence_absence <- function(occs) {
   final_occs <- cbind(area_names, new_occs)
 
   # Rownames won't make sense, so reset them to 1 to n
-  rownames(final_occs) <- c(1:length(final_occs$First))
+  rownames(final_occs) <- c(1:length(final_occs$first))
 
   return(final_occs)
 }
diff --git a/inst/extdata/SSARP_Example_Dat.csv b/inst/extdata/SSARP_Example_Dat.csv
@@ -1,4 +1,4 @@
-﻿acceptedScientificName,genericName,specificEpithet,decimalLongitude,decimalLatitude,First,Second,Third,datasetKey,areas
+﻿acceptedScientificName,genericName,specificEpithet,decimalLongitude,decimalLatitude,first,second,third,datasetKey,areas
 "Anolis ahli Barbour, 1925",Anolis,ahli,-80.072502,21.984518,Cuba,NA,NA,50c9509d-22c7-4a22-a47d-8c48425ef4a7,1.22E+11
 "Anolis alayoni Estrada & Hedges, 1995",Anolis,alayoni,-74.626318,20.412664,Cuba,NA,NA,50c9509d-22c7-4a22-a47d-8c48425ef4a7,1.22E+11
 "Anolis aliniger Mertens, 1939",Anolis,aliniger,-70.590035,19.104985,Dominican Republic,NA,NA,50c9509d-22c7-4a22-a47d-8c48425ef4a7,83104562500
diff --git a/man/find_areas.Rd b/man/find_areas.Rd
diff --git a/man/find_land.Rd b/man/find_land.Rd
diff --git a/man/get_presence_absence.Rd b/man/get_presence_absence.Rd
diff --git a/tests/testthat/test-find_areas.R b/tests/testthat/test-find_areas.R
@@ -1,8 +1,8 @@
 # Create test dataframe for find_areas occs input
 occs <- as.data.frame(matrix(ncol = 9, nrow = 2))
 colnames(occs) <- c("acceptedScientificName", "genericName", "specificEpithet", 
-                    "decimalLongitude", "decimalLatitude", "First", 
-                    "Second", "Third", "datasetKey")
+                    "decimalLongitude", "decimalLatitude", "first", 
+                    "second", "third", "datasetKey")
 
 # Test dataframe for find_areas occs input with values
 occs_vals <- occs
@@ -20,8 +20,8 @@ colnames(custom_area) <- c("Name", "AREA")
 # Test matrix for find_areas occs input
 occ_mat <- matrix(ncol = 9, nrow = 2)
 colnames(occs) <- c("acceptedScientificName", "genericName", "specificEpithet", 
-                    "decimalLongitude", "decimalLatitude", "First", 
-                    "Second", "Third", "datasetKey")
+                    "decimalLongitude", "decimalLatitude", "first", 
+                    "second", "third", "datasetKey")
 occ_mat[1,] <- c("Anolis first", "Anolis", "first", -81.948509, 28.028463, 
                  "USA", "Florida", "Lakeland", 1)
 occ_mat[2,] <- c("Anolis second", "Anolis", "second", -81.949353, 28.028047, 
@@ -44,9 +44,9 @@ occ_types$genericName <- as.factor(occ_types$genericName)
 occ_types$specificEpithet <- as.factor(occ_types$specificEpithet)
 occ_types$decimalLongitude <- as.character(occ_types$decimalLongitude)
 occ_types$decimalLatitude <- as.character(occ_types$decimalLatitude)
-occ_types$First <- as.factor(occ_types$First)
-occ_types$Second <- as.factor(occ_types$Second)
-occ_types$Third <- as.factor(occ_types$Third)
+occ_types$first <- as.factor(occ_types$first)
+occ_types$second <- as.factor(occ_types$second)
+occ_types$third <- as.factor(occ_types$third)
 
 ### Spatial inputs ###
 # Create test SpatVector with WKT polygon
diff --git a/tests/testthat/test-get_presence_absence.R b/tests/testthat/test-get_presence_absence.R
@@ -1,8 +1,8 @@
 # Create test dataframe for occs input
 occs <- as.data.frame(matrix(ncol = 10, nrow = 2))
 colnames(occs) <- c("acceptedScientificName", "genericName", "specificEpithet", 
-                    "decimalLongitude", "decimalLatitude", "First", 
-                    "Second", "Third", "datasetKey", "areas")
+                    "decimalLongitude", "decimalLatitude", "first", 
+                    "second", "third", "datasetKey", "areas")
 
 # Test dataframe for find_areas occs input with values
 occs_vals <- occs
@@ -17,8 +17,8 @@ occs_vals[,10] <- as.numeric(occs_vals[,10])
 # Test matrix for occs input
 occ_mat <- matrix(ncol = 10, nrow = 2)
 colnames(occs) <- c("acceptedScientificName", "genericName", "specificEpithet", 
-                    "decimalLongitude", "decimalLatitude", "First", 
-                    "Second", "Third", "datasetKey", "areas")
+                    "decimalLongitude", "decimalLatitude", "first", 
+                    "second", "third", "datasetKey", "areas")
 occ_mat[1,] <- c("Anolis first_sp", "Anolis", "first_sp", -81.948509, 
                  28.028463, "USA", "Florida", "Lakeland", 1, 100)
 occ_mat[2,] <- c("Anolis second_sp", "Anolis", "second_sp", -81.949353, 
@@ -32,9 +32,9 @@ colnames(occ_name) <- c(1:10)
 occ_types <- occs_vals
 occ_types$specificEpithet <- as.factor(occ_types$specificEpithet)
 occ_types$areas <- as.character(occ_types$areas)
-occ_types$First <- as.factor(occ_types$First)
-occ_types$Second <- as.factor(occ_types$Second)
-occ_types$Third <- as.factor(occ_types$Third)
+occ_types$first <- as.factor(occ_types$first)
+occ_types$second <- as.factor(occ_types$second)
+occ_types$third <- as.factor(occ_types$third)
 
 ########
 test_that("Inputting a matrix instead of a dataframe for occurrence records 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-acceptedScientificName,genericName,specificEpithet,decimalLongitude,decimalLatitude,First,Second,Third,datasetKey,areas`
	`1`	`+acceptedScientificName,genericName,specificEpithet,decimalLongitude,decimalLatitude,first,second,third,datasetKey,areas`
`2`	`2`	`"Anolis ahli Barbour, 1925",Anolis,ahli,-80.072502,21.984518,Cuba,NA,NA,50c9509d-22c7-4a22-a47d-8c48425ef4a7,1.22E+11`
`3`	`3`	`"Anolis alayoni Estrada & Hedges, 1995",Anolis,alayoni,-74.626318,20.412664,Cuba,NA,NA,50c9509d-22c7-4a22-a47d-8c48425ef4a7,1.22E+11`
`4`	`4`	`"Anolis aliniger Mertens, 1939",Anolis,aliniger,-70.590035,19.104985,Dominican Republic,NA,NA,50c9509d-22c7-4a22-a47d-8c48425ef4a7,83104562500`