LimaRAF
diff --git a/‎DESCRIPTION‎
Lines changed: 1 addition & 0 deletions b/‎DESCRIPTION‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 42 additions & 16 deletions b/‎NEWS.md‎
Lines changed: 42 additions & 16 deletions
diff --git a/‎R/accessory_geo.R‎
Lines changed: 15 additions & 7 deletions b/‎R/accessory_geo.R‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎R/checkBorders.R‎
Lines changed: 4 additions & 0 deletions b/‎R/checkBorders.R‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/checkCoord.R‎
Lines changed: 25 additions & 11 deletions b/‎R/checkCoord.R‎
Lines changed: 25 additions & 11 deletions
diff --git a/‎R/checkInverted.R‎
Lines changed: 5 additions & 3 deletions b/‎R/checkInverted.R‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎R/checkOut.R‎
Lines changed: 7 additions & 5 deletions b/‎R/checkOut.R‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎R/checkShore.R‎
Lines changed: 3 additions & 1 deletion b/‎R/checkShore.R‎
Lines changed: 3 additions & 1 deletion
@@ -51,6 +51,7 @@ Suggests:
     remotes,
     rmarkdown,
     sp,
+    s2,
     stats,
     testthat (>= 3.0.0),
     utils
 
@@ -1,4 +1,8 @@
-# plantR 0.1.4
+# plantR NEWS
+
+<br/>
+
+## version 0.1.4
 
 * Fixing an issue related to non-NA type specifications in `validateTax()` and
 `checkList()` used to validate species identifications and detect type
@@ -7,9 +11,12 @@ specimens.
 * Fixing a small issue related to non-preserved specimens observation in
 `validateTax()` to assign confidence levels to species identifications. 
 
-* Inclusion of the (simple) internal function `plotCoord()` to help the
+* New (simple) internal function `plotCoord()` to help the
 visualization of the coordinate validation process
 
+* New (simple) internal function `fixEncoding()` to help solving 
+common encoding problems of text in 'latin1'
+
 * Inclusion of an argument to make optional the prints of functions:
 `prepFamily()`, `summaryData()`, `summaryFlags()` and `validateTax()`
 
@@ -25,21 +32,34 @@ separately for the taxonomic, locality and coordinate information. We also fixed
 a bug related to the homogenization of the geographical and locality information
 
 * Function `prepTDWG()` (and consequently `prepName()`) now provides the
-argument 'pretty' which controls the way in which the output names are
-presented. By default the function returns, as before, names in a 'pretty' way
-(i.e. only the first letter of names capitalized, initials separated by points
-and no spaces, and family name prepositions in lower cases). But now the
-function also returns the names in the desired format but presented in the same
-way as the input names
+argument 'pretty' which controls how the output names are presented. By default
+the function returns, as before, names in a 'pretty' way (i.e. only the first
+letter of names capitalized, initials separated by points and no spaces, and
+family name prepositions in lower cases). But now the function also returns the
+names in the desired format but presented in the same way as the input names
 
-* Minor improvements in functions `formatDwc()`, `checkList()`, ,
-`fixSpecies()`, `prepName()`, `getPrep()`, `getInit()`, `lastName()`,
-`prepFamily()` and `addRank()`.
+* Function `formatDwc()` now accepts data downloaded from the BIEN database and
+it includes a dataset-specific option to solve common latin1 encoding problems
+
+* Function `fixName()` now includes an option to detect and solve potentially
+problematic cases when the name notation uses commas to separate multiple
+people's names. This option is controlled by the new argument `bad.comma`
+
+* Minor improvements in `checkList()`, `fixSpecies()`, `prepName()`,
+`getPrep()`, `getInit()`, `getYear()`, `colNumber()`, `lastName()`,
+`checkCoord()`, `mahalanobisDist()`, `prepFamily()` and `addRank()`.
 
 * Adding tests to most of the package functions
 
+* Solving some minor problems in the world map objects to make them compatible
+to the use of the spherical geometry operators of package __s2__, which is now
+the default of package __sf__ version >= 1.0
+
 
-# plantR 0.1.3
+
+<br/>
+
+## version 0.1.3
 
 * Fixing bugs in `formatDwc()` related to the difference in the number of
 columns that speciesLink returns for queries using different taxa and for the
@@ -57,7 +77,9 @@ binding of columns with different data types.
 `formatTax()`
 
 
-# plantR 0.1.2
+<br/>
+
+## version 0.1.2
 
 * New tutorial on how __plantR__ can be used to update databases of biological
 collections (currently in Portuguese, only)
@@ -83,9 +105,11 @@ prioritize the merge of taxonomic information within duplicates
 list of taxonomists related to the new tutorial 
 
 
-# plantR 0.1.1
+<br/>
 
-* Added function `readData()` to read DwC-A zip files from GBIF
+## version 0.1.1
+
+* New function `readData()` to read DwC-A zip files from GBIF
 
 * `checkCoord()` now supports user provided maps
 
@@ -94,6 +118,8 @@ list of taxonomists related to the new tutorial
 * Added a `NEWS.md` file to track changes to the package.
 
 
-# plantR 0.1.0
+<br/>
+
+## version 0.1.0
 
 * The first public version of the package in GitHub.
@@ -154,7 +154,7 @@ minDist <- function(lon, lat, min.dist = 0.001, output = NULL) {
 #'   (`method`= 'robust'). The two methods take into account the geographical
 #'   center of the coordinates distribution and the spatial covariance between
 #'   them. But they vary in the way the covariance matrix of the distribution is
-#'   defined: the classic method uses an approach based on Pearson’s method,
+#'   defined: the classic method uses an approach based on Pearson's method,
 #'   while the robust method uses a Minimum Covariance Determinant (MCD)
 #'   estimator.
 #'
@@ -321,13 +321,21 @@ mahalanobisDist <- function(lon, lat, method = NULL, n.min = 5, digs = 4,
       if (class(rob) == "try-error") {
         df1$lon2 <- jitter(df1$lon, factor = 0.001)
         df1$lat2 <- jitter(df1$lat, factor = 0.001)
-        rob <- robustbase::covMcd(df1[use_these, c("lon2", "lat2")],
-                                  alpha = 1 / 2, tol = 1e-20)
-        res0 <- cbind.data.frame(dup.coord.ID = df1$dup.coord.ID,
-                                 res = sqrt(stats::mahalanobis(df1[, c("lon2", "lat2")],
-                                                               center = rob$center,
-                                                               cov = rob$cov, tol=1e-20)))
+        rob <- suppressWarnings(try(
+                  robustbase::covMcd(df1[use_these, c("lon2", "lat2")],
+                                  alpha = 1 / 2, tol = 1e-20), TRUE))
+        if (class(rob) == "try-error") {
+          res0 <- cbind.data.frame(dup.coord.ID = df1$dup.coord.ID,
+                                   res = NA_character_)
+        } else {
+          res0 <- cbind.data.frame(dup.coord.ID = df1$dup.coord.ID,
+                                   res = sqrt(stats::mahalanobis(df1[, c("lon2", "lat2")],
+                                                                 center = rob$center,
+                                                                 cov = rob$cov, tol=1e-20)))
+        }
+
       } else {
+
         if (length(rob$singularity) > 0) {
           df1$lon2 <- jitter(df1$lon, factor = 0.005)
           df1$lat2 <- jitter(df1$lat, factor = 0.005)
 
@@ -7,6 +7,8 @@
 #'
 #' @author Andrea Sánchez-Tapia & Sara Mortara
 #'
+#' @encoding UTF-8
+#'
 #' @keywords internal
 #'
 shares_border <- function(country1 = "brazil",
@@ -76,6 +78,8 @@ shares_border <- function(country1 = "brazil",
 #'
 #' @author Andrea Sánchez-Tapia & Sara Mortara
 #'
+#' @encoding UTF-8
+#'
 #' @export checkBorders
 #'
 checkBorders <- function(x,
 
@@ -60,6 +60,7 @@
 #' the validation of the locality information (see function `getLoc()` and the
 #' default __plantR__ maps 'worldMap' and 'latamMap').
 #'
+#' @encoding UTF-8
 #'
 #' @export checkCoord
 #'
@@ -99,7 +100,7 @@ checkCoord <- function(x,
   ##Preliminary edits
   cols.x <- names(x) # original data column names
   x$tmp.order <- 1:nrow(x)
-  x[, str.name][x[, str.name] %in% "no_loc"] <- NA #porque nao é pais (rafl: concordo, mas não achei nehuma funcao onde esse 'no_loc' é gerado; melhor alterar direto na função que obtém o string, getLoc()?)
+  x[, str.name][x[, str.name] %in% "no_loc"] <- NA #porque nao eh pais (rafl: concordo, mas nao achei nehuma funcao onde esse 'no_loc' eh gerado; melhor alterar direto na funcao que obtem o string, getLoc()?)
 
   ##Defining the country, state and county columns
   x <- tidyr::separate(
@@ -119,7 +120,6 @@ checkCoord <- function(x,
     paste0("ok_", x[ids.gazet, res.gazet], "_gazet")
   ids.no.coord <- x[, orig.coord] %in% "no_coord"
   geo.check[ids.no.coord] <- "no_cannot_check"
-  #rafl: checar com mais dados se pode ter NAs ou outras classes
 
   ## Subsetting data for geographical checking
   tmp <- x[is.na(geo.check), ]
@@ -166,6 +166,21 @@ checkCoord <- function(x,
                                       join = sf::st_intersects))
   names(tmp)[which(names(tmp) == "NAME_0")] <- "pais_wo"
 
+  ##Solving misterious problems with the country map (could not iolate the problem)
+  check_these <- grepl("\\.[0-9]", rownames(tmp))
+  if (any(check_these)){
+    tmp$keep_these <- rep(TRUE, dim(tmp)[1])
+    dup.orders <- tmp$tmp.order[check_these]
+    for(i in seq_along(dup.orders)) {
+      dups.i <- tmp[tmp$tmp.order %in% dup.orders[i], ]
+      dups.i$keep_these[dups.i$country.new != dups.i$pais_wo] <- FALSE
+      if (all(dups.i$keep_these))
+        dups.i$keep_these[-1] <- FALSE
+      tmp$keep_these[tmp$tmp.order %in% dup.orders[i]] <- dups.i$keep_these
+    }
+    tmp <- tmp[tmp$keep_these, ]
+  }
+
   ##Defining which coordinates fall into the sea (i.e. original coordinates but no country, state or county)
   geo.check[is.na(geo.check)][is.na(tmp$pais_wo)] <- "sea"
 
@@ -215,33 +230,32 @@ checkCoord <- function(x,
   # cria o vetor para checar
   x2$loc.coord <- paste(x2$NAME_0, x2$NAME_1, x2$NAME_2, sep = "_")
   x2$loc.coord[x2$loc.coord %in% "NA_NA_NA"] <- NA_character_
-  x2$loc.coord <- gsub("_NA_NA$", "", x2$loc.coord, perl = TRUE) #rafl: necessário, certo?
-  x2$loc.coord <- gsub("_NA$", "", x2$loc.coord, perl = TRUE) #rafl: necessário, certo?
-  # ast: na real loc.coord nao é usado mais.então tudo isto poderia sumir.
-  # rafl: vdd, mas acho legal a possibilidade de retornar essa info. Pode ajudar na gestão/correção de coleções.
+  x2$loc.coord <- gsub("_NA_NA$", "", x2$loc.coord, perl = TRUE)
+  x2$loc.coord <- gsub("_NA$", "", x2$loc.coord, perl = TRUE)
 
   # recupera todas as linhas
   x3 <- suppressMessages(
     dplyr::left_join(x,
                      x2[,c("tmp.order",
                            "NAME_0", "NAME_1", "NAME_2", "NAME_3",
                            "loc.coord")]))
-  #ast: eu nao sei se vc está tirando colunas aqui mas pelo menos tirei o by que ia criar colunas duplicadas.
-  #rafl: ok! removi o geo.check e adicionei o suppressWarnings
 
   ### GEO-VALIDATION STEPS ###
   ##1- Validating the coordinates at different levels - exact matches
   #1.1 Country-level: good country? All countries
   x3$country.check <- dplyr::if_else(x3$country.gazet == x3$NAME_0,
-                                     "ok_country", "bad_country", missing = "no_country")
+                                     "ok_country", "bad_country",
+                                     missing = "no_country")
 
   #1.2 State-level: good state? All countries
   x3$state.check <- dplyr::if_else(x3$state.gazet == x3$NAME_1,
-                                   "ok_state", "bad_state", missing = "no_state")
+                                   "ok_state", "bad_state",
+                                   missing = "no_state")
 
   #1.3 County-level. All countries
   x3$county.check <- dplyr::if_else(x3$county.gazet == x3$NAME_2,
-                                    "ok_county", "bad_county", missing = "no_county")
+                                    "ok_county", "bad_county",
+                                    missing = "no_county")
 
   ## Updating geo.check
   tmp1 <- apply(x3[ , c("country.check",
 
@@ -43,10 +43,12 @@
 #'
 #' @importFrom sf st_as_sf st_crs st_set_crs st_coordinates st_join st_intersects st_geometry
 #'
-#' @export checkInverted
-#'
 #' @author Andrea Sánchez-Tapia, Sara Mortara & Renato A. F. de Lima
 #'
+#' @encoding UTF-8
+#'
+#' @export checkInverted
+#'
 checkInverted <- function(x,
                           check.names = c("geo.check", "border.check", "shore.check"),
                           country.gazetteer = "country.gazet",
@@ -127,7 +129,7 @@ checkInverted <- function(x,
     tmp$inv_lat <- -tmp[, lat]
     inv_lon <- "inv_lon"
     inv_lat <- "inv_lat"
-    #rafl: no codigo antigo eu fazia apenas os casos 1,2,3 e 4. Se me lembro bem, não achei os casos 5 e 6. Mas o 7 deve ter...
+    #rafl: no codigo antigo eu fazia apenas os casos 1,2,3 e 4. Se me lembro bem, nao achei os casos 5 e 6. Mas o 7 deve ter...
     types <- list(invert_lon = c(inv_lon, lat),
                invert_lat = c(lon, inv_lat),
                invert_both = c(inv_lon, inv_lat), #signos
 
@@ -87,7 +87,7 @@
 #' Conservation, 252: 108825.
 #'
 #' Liu, C., White, M., and Newell, G. 2018. Detecting outliers in species
-#' distribution data. Journal of Biogeography, 45(1): 164–176.
+#' distribution data. Journal of Biogeography, 45(1): 164-176.
 #'
 #'
 #' @seealso
@@ -105,7 +105,7 @@ checkOut <- function(x,
                      tax.name = "scientificName.new",
                      geo.name = "geo.check",
                      cult.name = "cult.check",
-                     n.min = 5,
+                     n.min = 6,
                      center = "median",
                      geo.patt = "ok_",
                      cult.patt = NA,
@@ -117,11 +117,14 @@ checkOut <- function(x,
   robust.cut <- out.check <- NULL
 
   ## check input
-  if (!class(x) == "data.frame")
+  if (!class(x)[1] == "data.frame")
     stop("Input object needs to be a data frame!")
 
+  if (dim(x)[1] == 0)
+    stop("Input data frame is empty!")
+
   if (!all(c(lat, lon) %in% colnames(x)))
-    stop("Coordinate column names do not match those of the input object: please rename or specify the correct names")
+    stop("Coordinates column names do not match those of the input object: please rename or specify the correct names")
 
   if (!tax.name %in% colnames(x)) {
     rm.tax <- TRUE
@@ -157,7 +160,6 @@ checkOut <- function(x,
                                      cult.patt = cult.patt),
      by = c("tax.wrk")]
 
-
   dt[!is.na(lon.wrk) & !is.na(lat.wrk),
      maha.robust := mahalanobisDist(lon.wrk, lat.wrk, n.min = n.min,
                                     method = "robust",
 
@@ -45,6 +45,8 @@
 #'
 #' @author Andrea Sánchez-Tapia, Sara Mortara & Renato A. Ferreira de Lima
 #'
+#' @encoding UTF-8
+#'
 #' @export checkShore
 checkShore <- function(x,
                      geo.check = "geo.check",
@@ -77,7 +79,7 @@ checkShore <- function(x,
     if (type == "buffer") {
       land <- landBuff
       test_shore <-
-        suppressMessages(sf::st_intersects(tmp, land, by_element = TRUE))
+        suppressMessages(sf::st_intersects(tmp, land))
       shore.check <- lengths(test_shore) == 1
     }