Skip to content

Commit 8936beb

Browse files
authored
Merge pull request #32 from NEONScience/deve
Deve
2 parents 3b14e1a + 86b10f9 commit 8936beb

File tree

8 files changed

+420
-46
lines changed

8 files changed

+420
-46
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
##############################################################################################
2+
#' @title Workflow for downloading dp04 data from unpublished file list in GCS
3+
4+
#' @author
5+
#' David Durden \email{eddy4R.info@gmail.com}
6+
7+
#' @description
8+
#' Workflow. Downloading unpublished SAE data from GCS.
9+
10+
#' @param Currently none
11+
12+
#' @return Currently none
13+
14+
#' @references
15+
16+
#' @keywords eddy-covariance, NEON
17+
18+
#' @examples Currently none
19+
20+
#' @seealso Currently none
21+
22+
# changelog and author contributions / copyrights
23+
# David (2024-09-23)
24+
# original creation
25+
##############################################################################################
26+
27+
#Site for analysis
28+
Site <- "KONA"
29+
30+
#Date begin
31+
dateBgn <- as.Date("2024-09-01")
32+
#Date end
33+
dateEnd <- as.Date("2024-09-10")
34+
35+
#Download directory
36+
DirDnld <- tempdir()
37+
38+
#Unpublished SAE file list
39+
listFile <- read.csv("https://storage.googleapis.com/neon-sae-files/ods/sae_files_unpublished/sae_file_url_unpublished.csv")
40+
41+
#Date interval
42+
setDate <- lubridate::interval(start = dateBgn, end = dateEnd)
43+
44+
#Subset file list by dates and site
45+
listFileSub <- listFile[as.Date(listFile$date) %within% setDate & listFile$site == Site,]
46+
47+
#Download filename (full path)
48+
fileDnld <- paste0(DirDnld,"/", str_extract(string = listFileSub$url,pattern = "NEON.*.h5$"))
49+
50+
#Download data
51+
lapply(seq_along(listFileSub$url), function(x){
52+
download.file(url = listFileSub$url[x], destfile = fileDnld[x])
53+
})
54+
55+
#Read in data
56+
dp04 <- neonUtilities::stackEddy(DirDnld, level = "dp04")

pack/eddy4R.base/R/def.hdf5.wrte.dp01.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ lapply(names(inpList$qfqm[[Dp01]]), function(x) {
157157
#convert to integer
158158
inpList$qfqm[[Dp01]][[x]]$qfFinl <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfFinl)
159159
#convert to integer
160-
inpList$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfSciRevw)
160+
#inpList$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfSciRevw)
161161
#Write 30-min qfqm output to HDF5
162162
rhdf5::h5writeDataset.data.frame(obj = inpList$qfqm[[Dp01]][[x]][,c("qfFinl","timeBgn","timeEnd")], h5loc = idQfqm30, name = x, DataFrameAsCompound = TRUE)})
163163

@@ -167,7 +167,7 @@ lapply(names(inpList$dp01AgrSub$qfqm[[Dp01]]), function(x) {
167167
#convert to integer
168168
inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfFinl <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfFinl)
169169
#convert to integer
170-
inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw)
170+
#inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw)
171171
#Write 1-min output to HDF5
172172
rhdf5::h5writeDataset.data.frame(obj = inpList$dp01AgrSub$qfqm[[Dp01]][[x]][,c("qfFinl","timeBgn","timeEnd")], h5loc = idQfqm01, name = x, DataFrameAsCompound = TRUE)})
173173
}
@@ -178,7 +178,7 @@ lapply(names(inpList$dp01AgrSub$qfqm[[Dp01]]), function(x) {
178178
#convert to integer
179179
inpList$qfqm[[Dp01]][[x]]$qfFinl <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfFinl)
180180
#convert to integer
181-
inpList$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfSciRevw)
181+
#inpList$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfSciRevw)
182182
#Write 30-min qfqm output to HDF5
183183
rhdf5::h5writeDataset.data.frame(obj = inpList$qfqm[[Dp01]][[x]], h5loc = idQfqm30, name = x, DataFrameAsCompound = TRUE)})
184184

@@ -189,7 +189,7 @@ if(MethSubAgr == TRUE){
189189
#convert to integer
190190
inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfFinl <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfFinl)
191191
#convert to integer
192-
inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw)
192+
#inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw)
193193
#Write 1-min output to HDF5
194194
rhdf5::h5writeDataset.data.frame(obj = inpList$dp01AgrSub$qfqm[[Dp01]][[x]], h5loc = idQfqm01, name = x, DataFrameAsCompound = TRUE)})
195195
}

pack/eddy4R.qaqc/R/wrap.dp01.qfqm.ecse.R

Lines changed: 193 additions & 29 deletions
Large diffs are not rendered by default.

pack/eddy4R.qaqc/R/wrap.dp01.qfqm.eddy.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ wrap.dp01.qfqm.eddy <- function(
111111

112112

113113
#assign default qfSciRevw
114-
lapply(names(tmp), function(x) tmp[[x]]$qfqm$qfSciRevw <<- 0)
114+
lapply(names(tmp), function(x) tmp[[x]]$qfqm$qfSciRevw <<- NaN)
115115
#Only report expanded quality metrics if producing expanded file
116116
if(RptExpd == TRUE){
117117
#calculate quality metrics (pass, fail, NA for each flag)
@@ -128,7 +128,7 @@ wrap.dp01.qfqm.eddy <- function(
128128
lapply(names(tmp), function(x) rpt$qmAlph[[x]] <<- tmp[[x]]$qfqm$qmAlph)
129129
lapply(names(tmp), function(x) rpt$qmBeta[[x]] <<- tmp[[x]]$qfqm$qmBeta)
130130
lapply(names(tmp), function(x) rpt$qfFinl[[x]] <<- as.integer(tmp[[x]]$qfqm$qfFinl))
131-
lapply(names(tmp), function(x) rpt$qfSciRevw[[x]] <<- as.integer(tmp[[x]]$qfqm$qfSciRevw))
131+
lapply(names(tmp), function(x) rpt$qfSciRevw[[x]] <<- tmp[[x]]$qfqm$qfSciRevw)
132132

133133
# Convert output to dataframe's
134134
rpt$qmAlph <- base::rbind.data.frame(rpt$qmAlph)

pack/eddy4R.qaqc/man/def.plau.Rd

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pack/eddy4R.stor/R/def.shft.time.isoCo2.R

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@
4343
# add header and apply eddy4R terms
4444
# Natchaya Pingintha-Durden (2024-08-20)
4545
# added a failsafe in case all data at some/all measurement level are missing
46+
# Natchaya Pingintha-Durden (2024-11-25)
47+
# update the number of missing data from 0 to 35
48+
# add tryCatch() when kmean can not be determine
4649
####################################################################################################
4750
def.shft.time.isoCo2 <- function (
4851
dataList,
@@ -123,8 +126,8 @@ def.shft.time.isoCo2 <- function (
123126
medTmp <- medTmp[complete.cases(medTmp$rtioMoleDryCo2), ]
124127
highTmp <- highTmp[complete.cases(highTmp$rtioMoleDryCo2), ]
125128

126-
# need to stop if some df are missing:
127-
if (nrow(lowTmp) == 0 || nrow(medTmp) == 0 || nrow(highTmp) == 0) {
129+
# need to stop if some df are missing or less than 1 minute avialable data (~35):
130+
if (nrow(lowTmp) <= 35 || nrow(medTmp) <= 35 || nrow(highTmp) <= 35) {
128131
return(rpt) # some reference data missing, following steps will fail,
129132
# so just return the input list
130133
}
@@ -147,9 +150,39 @@ def.shft.time.isoCo2 <- function (
147150

148151
#when ofstLow, ofstMed or ofstHigh is NA using k-mean clustering method determine the index
149152
#using k-mean clustering method determine if there is a time offset, and exit if there is not.
150-
kmeanLow <- stats::kmeans(lowTmp$rtioMoleDryCo2, centers = 2)
151-
kmeanMed <- stats::kmeans(medTmp$rtioMoleDryCo2, centers = 2)
152-
kmeanHigh <- stats::kmeans(highTmp$rtioMoleDryCo2, centers = 2)
153+
#Error could happened when kmeans not able to distinct data. To eliminate this error, centers needs to change from 2 to 1
154+
#List of temporary variables
155+
kmeanTmp <- list("kmeanLow", "kmeanMed", "kmeanHigh")
156+
157+
#Function for the next job (to handle error and change centers)
158+
nextJob <- function(tmpTab, idx) {
159+
cat("Proceeding to the next job with centers = 1...\n")
160+
# Assign result with centers = 1
161+
tmp <- stats::kmeans(tmpTab$rtioMoleDryCo2, centers = 1)
162+
return(tmp)
163+
}
164+
165+
for (idx in 1:3) {
166+
#Select appropriate tmpTab based on idx
167+
if (idx == 1) tmpTab <- lowTmp
168+
if (idx == 2) tmpTab <- medTmp
169+
if (idx == 3) tmpTab <- highTmp
170+
171+
#Attempt to perform kmeans clustering
172+
tryCatch({
173+
# Attempt with centers = 2
174+
kmeanTmp[[idx]] <- stats::kmeans(tmpTab$rtioMoleDryCo2, centers = 2)
175+
}, error = function(e) {
176+
#If error occurs, print message and proceed to the next job
177+
cat("Error with centers = 2, changing to centers = 1: ", e$message, "\n")
178+
#Call nextJob to attempt with centers = 1
179+
kmeanTmp[[idx]] <<- nextJob(tmpTab, idx)
180+
})
181+
}
182+
#Assign kmean to each table
183+
kmeanLow <- kmeanTmp[[1]]
184+
kmeanMed <- kmeanTmp[[2]]
185+
kmeanHigh <- kmeanTmp[[3]]
153186

154187
#get index when cluster group changed
155188
ofstKmeanLow <- which(kmeanLow$cluster != kmeanLow$cluster[1])[1]
@@ -166,6 +199,10 @@ def.shft.time.isoCo2 <- function (
166199
if (length(ofstMed) > 1) {ofstMed <- ofstMed[1]}
167200
if (length(ofstHigh) > 1) {ofstHigh <- ofstHigh[1]}
168201

202+
#return rpt when one of ofst is NA
203+
if (is.na(ofstLow) | is.na(ofstMed) | is.na(ofstMed)) {
204+
return(rpt)
205+
}
169206

170207
# get step and time offsets.
171208
stepOffsetLow <- hms::as_hms(difftime(as.POSIXct(lowTmp$time[ofstLow], format="%Y-%m-%dT%H:%M:%S", tz="GMT"),

pack/eddy4R.stor/R/def.shft.time.isoH2o.R

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
# original creation developed Rich's core work for def.shft.time.isoCo2
3434
# Natchaya Pingintha-Durden (2024-08-20)
3535
# added a failsafe in case all data at some/all measurement level are missing
36+
# Natchaya Pingintha-Durden (2024-09-19)
37+
# fixed issues when time correction cannot be determined due to NaN data in stusN2
3638
####################################################################################################
3739
def.shft.time.isoH2o <- function (
3840
dataList,
@@ -125,6 +127,9 @@ def.shft.time.isoH2o <- function (
125127
allData <- do.call(rbind, list(highData, medData, lowData, wrkLvlData))
126128
allData <- allData[order(allData$time), ]
127129

130+
#return the input list if data from all stusN2 are missing:
131+
if (all(is.na(allData$stusN2))) {return(rpt)}
132+
128133
###############################################################################
129134
#get first index when vaporizer 3-way valve turn on (1)
130135
idxValvHead <- head(which(allData$valv == 1), n=1)
@@ -138,7 +143,8 @@ def.shft.time.isoH2o <- function (
138143

139144

140145
#calculate time difference between valvCrdH2o and vaporizer 3-way valve
141-
if ((idxValvHead == 1 | idxValvCrdH2oHead == 1) & allData$injNum[1] != 1){
146+
if (((idxValvHead == 1 | idxValvCrdH2oHead == 1) & allData$injNum[1] != 1) ||
147+
length(idxValvHead) == 0 || length(idxValvCrdH2oHead) == 0){
142148
#assign NA to time difference between valvCrdH2o and vaporizer 3-way valve
143149
#if the first injection occurred in previous day and the time difference cannot determine
144150
timeOfstHead <- NA
@@ -147,7 +153,8 @@ def.shft.time.isoH2o <- function (
147153
as.POSIXct(allData$time[idxValvHead], format="%Y-%m-%dT%H:%M:%S", tz="GMT")))
148154
}
149155

150-
if ((idxValvTail == nrow(allData) | idxValvCrdH2oTail == nrow(allData)) & allData$injNum[nrow(allData)] != 18){
156+
if (((idxValvTail == nrow(allData) | idxValvCrdH2oTail == nrow(allData)) & allData$injNum[nrow(allData)] != 18) ||
157+
length(idxValvTail) == 0 || length(idxValvCrdH2oTail) == 0){
151158
#assign NA to time difference between valvCrdH2o and vaporizer 3-way valve
152159
#if the last injection (injNum = 18) occurred in next day and the time difference cannot determine
153160
timeOfstTail <- NA
@@ -156,6 +163,9 @@ def.shft.time.isoH2o <- function (
156163
as.POSIXct(allData$time[idxValvTail], format="%Y-%m-%dT%H:%M:%S", tz="GMT")))
157164
}
158165

166+
#return the input list if data from both timeOfstHeand timeOfstTail cannot be determined:
167+
if (is.na(timeOfstHead) & is.na(timeOfstTail)) {return(rpt)}
168+
159169
#get mean ofset
160170
timeOfstMean <- as.numeric(mean(c(timeOfstHead, timeOfstTail), na.rm = TRUE))
161171

0 commit comments

Comments
 (0)