-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path02.CleanWTData.R
More file actions
117 lines (97 loc) · 4.6 KB
/
02.CleanWTData.R
File metadata and controls
117 lines (97 loc) · 4.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# ---
# title: BAM dataset - clean WildTrax data
# author: Elly Knight
# created: March 2, 2026
# ---
#NOTES################################
#PURPOSE: This script tidies the WT data downloads into objects that can then be harmonized with eBird data to produce the final dataset.
#Note that surveys done with acoustic recorders and transcribed to point count and stored in the point count sensor (survey_distance_method=="0m-INF-ARU") are assigned a point count method type because they are legacy data that were likely transcribed through continuous listening without the use of a spectrogram.
#PREAMBLE############################
#1. Load packages----
library(tidyverse) #basic data wrangling
library(wildrtrax) #to tidy data from wildtrax
#2. Set root path for data on google drive----
root <- "G:/Shared drives/BAM_AvianData/BAMDataset"
#3. Login to WildTrax----
source("WTlogin.R")
wt_auth()
#4. Set the WT version ----
v.wt <- "2026-03-09"
#5. Get the downloaded data object ----
load(file.path(root, "WildTrax", v.wt, paste0("01_wildtrax_raw_", v.wt, ".Rdata")))
#TIDY ARU DATA###########
#1. Collapse to single dataframe ----
aru <- do.call(rbind, aru.wt)
#2. Tidy and format ----
#we have to filter to the first detection for each "individual_order" because some individuals have multiple tags
aru.tidy <- aru |>
wt_tidy_species(remove=c("abiotic", "insect", "human")) |>
wt_replace_tmtt() |>
rename(date_time = recording_date_time,
duration = task_duration,
method = task_method,
survey_id = task_id,
status = task_is_complete) |>
mutate(distance = Inf,
sensor = "ARU",
species = ifelse(species_code=="species", "UNKN", species_code)) |>
group_by(organization, project_id, location_id, location_buffer_m, longitude, latitude, survey_id, date_time, status, method, duration, distance, max_noise_type, max_noise_volume, species, individual_order) |>
dplyr::filter(detection_time==min(detection_time)) |>
group_by(organization, project_id, location_id, location_buffer_m, longitude, latitude, survey_id, date_time, status, method, duration, distance, max_noise_type, max_noise_volume, species) |>
summarize(count = sum(individual_count)) |>
ungroup()
#TIDY PC DATA############
#1. Collapse to a single dataframe ----
pc <- do.call(rbind, pc.wt)
#2. Tidy and format ----
pc.tidy <- pc |>
wt_tidy_species(remove=c("abiotic", "insect", "human")) |>
rename(date_time = survey_date) |>
mutate(method = "PC",
duration = as.integer(str_extract(survey_duration_method,
"(?<=-)[0-9]+(?=min?)"))*60,
distance = ifelse(str_sub(survey_distance_method, -3, -1) %in% c("INF", "ARU"), Inf,
as.integer(str_extract(survey_distance_method,
"(?<=-)[0-9]+(?=m?)"))),
max_noise_type = NA,
max_noise_volume = NA,
count = as.integer(individual_count),
species = ifelse(species_code=="species", "UNKN", species_code),
status = TRUE) |>
dplyr::select(all_of(colnames(aru.tidy)))
#PUT TOGETHER#########
#1. Combine ----
wt.tidy <- rbind(aru.tidy, pc.tidy)
#2. Filter out tasks we don't want ----
#filter to approximately North America
#clean up some bird codes
#only use species with 4 letter codes
wt.use <- wt.tidy |>
dplyr::filter(method!="None",
status %in% c("t", "TRUE"),
(max_noise_volume!="Extreme" | is.na(max_noise_volume)),
(!max_noise_type %in% c("ARU Malfunction") | is.na(max_noise_type)),
(is.na(location_buffer_m) | location_buffer_m==0),
!is.na(duration),
!is.na(distance),
!is.na(latitude),
!is.na(date_time),
str_length(species)==4,
species!="4794",
latitude > 10,
latitude < 85,
longitude < -52,
longitude > -168) |>
mutate(species = case_when(species=="GRAJ" ~ "CAJA",
species=="PSFL" ~ "WEFL",
species=="MEGU" ~ "COGU",
!is.na(species) ~ species))
rm(wt.tidy)
#3. Make wide ----
#we don't use wt_make_wide() because we're using a different format now
#remove columns we dont need anymore
wt.wide <- wt.use |>
pivot_wider(names_from=species, values_from=count, values_fn=sum, values_fill=0) |>
dplyr::select(-status, -location_buffer_m, -max_noise_type, -max_noise_volume)
#4. Save ----
save(wt.wide, file=file.path(root, "WildTrax", v.wt, paste0("02_wildtrax_clean_", v.wt, ".Rdata")))