NEONScience
diff --git a/‎flow/tool/flow.dnld.dp04.unpb.gcs.R‎
Lines changed: 56 additions & 0 deletions b/‎flow/tool/flow.dnld.dp04.unpb.gcs.R‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎pack/eddy4R.base/R/def.hdf5.wrte.dp01.R‎
Lines changed: 4 additions & 4 deletions b/‎pack/eddy4R.base/R/def.hdf5.wrte.dp01.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎pack/eddy4R.qaqc/R/wrap.dp01.qfqm.ecse.R‎
Lines changed: 193 additions & 29 deletions b/‎pack/eddy4R.qaqc/R/wrap.dp01.qfqm.ecse.R‎
Lines changed: 193 additions & 29 deletions
diff --git a/‎pack/eddy4R.qaqc/R/wrap.dp01.qfqm.eddy.R‎
Lines changed: 2 additions & 2 deletions b/‎pack/eddy4R.qaqc/R/wrap.dp01.qfqm.eddy.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pack/eddy4R.qaqc/man/def.plau.Rd‎
Lines changed: 0 additions & 1 deletion b/‎pack/eddy4R.qaqc/man/def.plau.Rd‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎pack/eddy4R.stor/R/def.shft.time.isoCo2.R‎
Lines changed: 42 additions & 5 deletions b/‎pack/eddy4R.stor/R/def.shft.time.isoCo2.R‎
Lines changed: 42 additions & 5 deletions
diff --git a/‎pack/eddy4R.stor/R/def.shft.time.isoH2o.R‎
Lines changed: 12 additions & 2 deletions b/‎pack/eddy4R.stor/R/def.shft.time.isoH2o.R‎
Lines changed: 12 additions & 2 deletions
@@ -0,0 +1,56 @@
+##############################################################################################
+#' @title Workflow for downloading dp04 data from unpublished file list in GCS
+
+#' @author
+#' David Durden \email{eddy4R.info@gmail.com}
+
+#' @description
+#' Workflow. Downloading unpublished SAE data from GCS.
+
+#' @param Currently none
+
+#' @return Currently none
+
+#' @references
+
+#' @keywords eddy-covariance, NEON
+
+#' @examples Currently none
+
+#' @seealso Currently none
+
+# changelog and author contributions / copyrights
+#   David (2024-09-23)
+#     original creation
+##############################################################################################
+
+#Site for analysis
+Site <- "KONA"
+
+#Date begin
+dateBgn <- as.Date("2024-09-01")
+#Date end
+dateEnd <- as.Date("2024-09-10")
+
+#Download directory
+DirDnld <- tempdir()
+
+#Unpublished SAE file list
+listFile <- read.csv("https://storage.googleapis.com/neon-sae-files/ods/sae_files_unpublished/sae_file_url_unpublished.csv")
+
+#Date interval
+setDate <- lubridate::interval(start = dateBgn, end = dateEnd)
+
+#Subset file list by dates and site
+listFileSub <- listFile[as.Date(listFile$date) %within% setDate & listFile$site == Site,]
+
+#Download filename (full path)
+fileDnld <-  paste0(DirDnld,"/", str_extract(string = listFileSub$url,pattern = "NEON.*.h5$"))
+
+#Download data
+lapply(seq_along(listFileSub$url), function(x){
+  download.file(url = listFileSub$url[x], destfile = fileDnld[x])
+})
+
+#Read in data
+dp04 <- neonUtilities::stackEddy(DirDnld, level = "dp04")       
@@ -157,7 +157,7 @@ lapply(names(inpList$qfqm[[Dp01]]), function(x)  {
   #convert to integer
   inpList$qfqm[[Dp01]][[x]]$qfFinl <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfFinl)
   #convert to integer
-  inpList$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfSciRevw)
+  #inpList$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfSciRevw)
   #Write 30-min qfqm output to HDF5
   rhdf5::h5writeDataset.data.frame(obj = inpList$qfqm[[Dp01]][[x]][,c("qfFinl","timeBgn","timeEnd")], h5loc = idQfqm30, name = x, DataFrameAsCompound = TRUE)})
 
@@ -167,7 +167,7 @@ lapply(names(inpList$dp01AgrSub$qfqm[[Dp01]]), function(x)  {
   #convert to integer
   inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfFinl <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfFinl) 
   #convert to integer
-  inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw) 
+  #inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw) 
   #Write 1-min output to HDF5
   rhdf5::h5writeDataset.data.frame(obj = inpList$dp01AgrSub$qfqm[[Dp01]][[x]][,c("qfFinl","timeBgn","timeEnd")], h5loc = idQfqm01, name = x, DataFrameAsCompound = TRUE)})
   }
@@ -178,7 +178,7 @@ lapply(names(inpList$dp01AgrSub$qfqm[[Dp01]]), function(x)  {
     #convert to integer
     inpList$qfqm[[Dp01]][[x]]$qfFinl <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfFinl) 
     #convert to integer
-    inpList$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfSciRevw)
+    #inpList$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$qfqm[[Dp01]][[x]]$qfSciRevw)
     #Write 30-min qfqm output to HDF5
     rhdf5::h5writeDataset.data.frame(obj = inpList$qfqm[[Dp01]][[x]], h5loc = idQfqm30, name = x, DataFrameAsCompound = TRUE)})
 
@@ -189,7 +189,7 @@ if(MethSubAgr == TRUE){
     #convert to integer
     inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfFinl <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfFinl) 
     #convert to integer
-    inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw) 
+    #inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw <<- as.integer(inpList$dp01AgrSub$qfqm[[Dp01]][[x]]$qfSciRevw) 
     #Write 1-min output to HDF5
     rhdf5::h5writeDataset.data.frame(obj = inpList$dp01AgrSub$qfqm[[Dp01]][[x]], h5loc = idQfqm01, name = x, DataFrameAsCompound = TRUE)})
   }
 
@@ -111,7 +111,7 @@ wrap.dp01.qfqm.eddy <- function(
 
 
   #assign default qfSciRevw
-  lapply(names(tmp), function(x) tmp[[x]]$qfqm$qfSciRevw <<- 0)
+  lapply(names(tmp), function(x) tmp[[x]]$qfqm$qfSciRevw <<- NaN)
   #Only report expanded quality metrics if producing expanded file
   if(RptExpd == TRUE){
     #calculate quality metrics (pass, fail, NA for each flag)
@@ -128,7 +128,7 @@ wrap.dp01.qfqm.eddy <- function(
   lapply(names(tmp), function(x) rpt$qmAlph[[x]] <<- tmp[[x]]$qfqm$qmAlph)
   lapply(names(tmp), function(x) rpt$qmBeta[[x]] <<- tmp[[x]]$qfqm$qmBeta)
   lapply(names(tmp), function(x) rpt$qfFinl[[x]] <<- as.integer(tmp[[x]]$qfqm$qfFinl))
-  lapply(names(tmp), function(x) rpt$qfSciRevw[[x]] <<- as.integer(tmp[[x]]$qfqm$qfSciRevw))
+  lapply(names(tmp), function(x) rpt$qfSciRevw[[x]] <<- tmp[[x]]$qfqm$qfSciRevw)
 
   # Convert output to dataframe's
   rpt$qmAlph <- base::rbind.data.frame(rpt$qmAlph)
 
@@ -43,6 +43,9 @@
 #     add header and apply eddy4R terms
 #   Natchaya Pingintha-Durden (2024-08-20)
 #     added a failsafe in case all data at some/all measurement level are missing
+#   Natchaya Pingintha-Durden (2024-11-25)
+#     update the number of missing data from 0 to 35
+#     add tryCatch() when kmean can not be determine
 ####################################################################################################
 def.shft.time.isoCo2 <- function (
   dataList, 
@@ -123,8 +126,8 @@ def.shft.time.isoCo2 <- function (
 	medTmp <- medTmp[complete.cases(medTmp$rtioMoleDryCo2), ]
 	highTmp <- highTmp[complete.cases(highTmp$rtioMoleDryCo2), ]
 
-	# need to stop if some df are missing:
-	if (nrow(lowTmp) == 0 || nrow(medTmp) == 0 || nrow(highTmp) == 0) {
+	# need to stop if some df are missing or less than 1 minute avialable data (~35):
+	if (nrow(lowTmp) <= 35 || nrow(medTmp) <= 35 || nrow(highTmp) <= 35) {
 		return(rpt) # some reference data missing, following steps will fail,
 						 # so just return the input list
 	}
@@ -147,9 +150,39 @@ def.shft.time.isoCo2 <- function (
 
 	#when ofstLow, ofstMed or ofstHigh is NA using  k-mean clustering method determine the index
 	#using k-mean clustering method determine if there is a time offset, and exit if there is not.
-	kmeanLow <- stats::kmeans(lowTmp$rtioMoleDryCo2, centers = 2)
-	kmeanMed <- stats::kmeans(medTmp$rtioMoleDryCo2, centers = 2)
-	kmeanHigh <- stats::kmeans(highTmp$rtioMoleDryCo2, centers = 2)
+	#Error could happened when kmeans not able to distinct data. To eliminate this error, centers needs to change from 2 to 1
+	#List of temporary variables
+	kmeanTmp <- list("kmeanLow", "kmeanMed", "kmeanHigh")
+	
+	#Function for the next job (to handle error and change centers)
+	nextJob <- function(tmpTab, idx) {
+	  cat("Proceeding to the next job with centers = 1...\n")
+	  # Assign result with centers = 1
+	  tmp <- stats::kmeans(tmpTab$rtioMoleDryCo2, centers = 1)
+	  return(tmp)
+	}
+	
+	for (idx in 1:3) {
+	  #Select appropriate tmpTab based on idx
+	  if (idx == 1) tmpTab <- lowTmp
+	  if (idx == 2) tmpTab <- medTmp
+	  if (idx == 3) tmpTab <- highTmp
+	  
+	  #Attempt to perform kmeans clustering
+	  tryCatch({
+	    # Attempt with centers = 2
+	    kmeanTmp[[idx]] <- stats::kmeans(tmpTab$rtioMoleDryCo2, centers = 2)
+	  }, error = function(e) {
+	    #If error occurs, print message and proceed to the next job
+	    cat("Error with centers = 2, changing to centers = 1: ", e$message, "\n")
+	    #Call nextJob to attempt with centers = 1
+	    kmeanTmp[[idx]] <<- nextJob(tmpTab, idx)
+	  })
+	}
+	#Assign kmean to each table
+	kmeanLow <- kmeanTmp[[1]]
+	kmeanMed <- kmeanTmp[[2]]
+	kmeanHigh <- kmeanTmp[[3]]
 
 	#get index when cluster group changed
 	ofstKmeanLow <- which(kmeanLow$cluster != kmeanLow$cluster[1])[1]
@@ -166,6 +199,10 @@ def.shft.time.isoCo2 <- function (
 	if (length(ofstMed) > 1) {ofstMed <- ofstMed[1]}
 	if (length(ofstHigh) > 1) {ofstHigh <- ofstHigh[1]}
 
+	#return rpt when one of ofst is NA
+	if (is.na(ofstLow) | is.na(ofstMed) | is.na(ofstMed)) {
+	  return(rpt) 
+	}
 
 	# get step and time offsets.
 	stepOffsetLow <- hms::as_hms(difftime(as.POSIXct(lowTmp$time[ofstLow], format="%Y-%m-%dT%H:%M:%S", tz="GMT"), 
 
@@ -33,6 +33,8 @@
 #     original creation developed Rich's core work for def.shft.time.isoCo2
 #   Natchaya Pingintha-Durden (2024-08-20)
 #     added a failsafe in case all data at some/all measurement level are missing
+#   Natchaya Pingintha-Durden (2024-09-19)
+#     fixed issues when time correction cannot be determined due to NaN data in stusN2
 ####################################################################################################
 def.shft.time.isoH2o <- function (
   dataList, 
@@ -125,6 +127,9 @@ def.shft.time.isoH2o <- function (
   allData <- do.call(rbind, list(highData, medData, lowData, wrkLvlData))
   allData <- allData[order(allData$time), ]
 
+  #return the input list if data from all stusN2 are missing:
+  if (all(is.na(allData$stusN2))) {return(rpt)}
+  
   ###############################################################################
   #get first index when vaporizer 3-way valve turn on (1)
   idxValvHead <- head(which(allData$valv == 1), n=1)
@@ -138,7 +143,8 @@ def.shft.time.isoH2o <- function (
 
 
   #calculate time difference between valvCrdH2o and vaporizer 3-way valve 
-  if ((idxValvHead == 1 | idxValvCrdH2oHead == 1) & allData$injNum[1] != 1){
+  if (((idxValvHead == 1 | idxValvCrdH2oHead == 1) & allData$injNum[1] != 1) ||
+      length(idxValvHead) == 0 || length(idxValvCrdH2oHead) == 0){
     #assign NA to time difference between valvCrdH2o and vaporizer 3-way valve 
     #if the first injection occurred in previous day and the time difference cannot determine
     timeOfstHead  <- NA
@@ -147,7 +153,8 @@ def.shft.time.isoH2o <- function (
                                             as.POSIXct(allData$time[idxValvHead], format="%Y-%m-%dT%H:%M:%S", tz="GMT")))
       }
 
-  if ((idxValvTail == nrow(allData) | idxValvCrdH2oTail == nrow(allData)) & allData$injNum[nrow(allData)] != 18){
+  if (((idxValvTail == nrow(allData) | idxValvCrdH2oTail == nrow(allData)) & allData$injNum[nrow(allData)] != 18) ||
+      length(idxValvTail) == 0 || length(idxValvCrdH2oTail) == 0){
     #assign NA to time difference between valvCrdH2o and vaporizer 3-way valve 
     #if the last injection (injNum = 18) occurred in next day and the time difference cannot determine
     timeOfstTail  <- NA
@@ -156,6 +163,9 @@ def.shft.time.isoH2o <- function (
                                             as.POSIXct(allData$time[idxValvTail], format="%Y-%m-%dT%H:%M:%S", tz="GMT")))
       }
 
+  #return the input list if data from both timeOfstHeand timeOfstTail cannot be determined:
+  if (is.na(timeOfstHead) & is.na(timeOfstTail)) {return(rpt)}
+
   #get mean ofset
   timeOfstMean <- as.numeric(mean(c(timeOfstHead, timeOfstTail), na.rm = TRUE))