-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathdarwincore2csv.R
More file actions
45 lines (36 loc) · 1.4 KB
/
darwincore2csv.R
File metadata and controls
45 lines (36 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# this script takes a darwin core archive and extracts all distinct occurrences from it,
# writing them to a csv file in the data/filtered/ directory
library(finch) # reads darwincore zip file
library(dplyr) # data munging library
# what to write in the data file and output csv file name
genus.name <- "Equus"
species.name <- "africanus"
# for the workflow of unpacking a zip file downloaded from gbif into the
# /data/domesticated folder these variables can be left unchanged
REPO_HOME <- paste(getwd(), "/../", sep = "")
infile.name <- sprintf("%s/data/domesticated/%s_%s/darwincore.zip", REPO_HOME, genus.name, species.name)
outfile.name <- sprintf("%s/data/filtered/%s_%s.csv", REPO_HOME, genus.name, species.name)
# read the occurrences file from darwincore archive
finch::dwca_cache$delete_all()
infile.dwca_gbif <- finch::dwca_read(infile.name, read = TRUE)
occurrences.df <- infile.dwca_gbif$data$occurrence.txt
# select, filter
occurrences.df <- dplyr::select(
occurrences.df,
gbif_id = gbifID,
decimal_latitude = decimalLatitude,
decimal_longitude = decimalLongitude
)
occurrences.df <- occurrences.df[!duplicated(occurrences.df[2:3]), ]
# add taxon name column
occurrences.df$taxon_name <- paste(genus.name, species.name)
# reorder columns
occurrences.df <- occurrences.df[, c(1, 4, 2, 3)]
# write out file
write.csv(
occurrences.df,
file = outfile.name,
quote = F,
eol = "\n",
row.names = F
)