From f4a133806d95cb963ca10a3725e8fcda55fbfb45 Mon Sep 17 00:00:00 2001 From: Paul Whitfield Date: Mon, 8 Sep 2025 12:03:23 -0700 Subject: [PATCH 1/8] renamed ch_sh_get_amax to ch_get_amax --- R/{ch_sh_get_amax.R => ch_get_amax.R} | 110 +++++++++++++------------- 1 file changed, 55 insertions(+), 55 deletions(-) rename R/{ch_sh_get_amax.R => ch_get_amax.R} (96%) diff --git a/R/ch_sh_get_amax.R b/R/ch_get_amax.R similarity index 96% rename from R/ch_sh_get_amax.R rename to R/ch_get_amax.R index 8de2a85..58fd0f1 100644 --- a/R/ch_sh_get_amax.R +++ b/R/ch_get_amax.R @@ -1,56 +1,56 @@ -#' Extracts annual maximum values from ECDE dataframe. -#' -#' @description -#' Extracts annual maximum values, the date of occurrence, the day of year, and the completeness -#' from ECDE dataframe. Uses functions from timeDate ( \code{as.timeDate}, \code{dayOfYear}). -#' -#' @param df A dataframe of daily streamflow data from ECDE -#' -#' @return Returns a dataframe with the following variables -#' \item{year}{} -#' \item{annual maximum}{} -#' \item{date of annual maximum}{} -#' \item{day of year of annual maximum}{} -#' \item{days}{number of days with observations} -#' -#' @export -#' -#' @author Paul Whitfield -#' @seealso \code{\link{ch_read_ECDE_flows}} \code{\link{ch_circ_mean_reg}} -#' @examples -#' data(CAN05AA008) -#' amax <- ch_sh_get_amax(CAN05AA008) -#' str(amax) - - -ch_sh_get_amax <- function(df) { - - data <- df$Flow - Date <- df$Date - year <- format(Date, "%Y") - Year <- as.numeric(unique(year)) - maxdate <- array(NA, dim = length(Year)) - doy <- array(NA, dim = length(Year)) - days <- array(NA, dim = length(Year)) - class(maxdate) <- "Date" - year <- as.factor(year) - - amax <- as.numeric(tapply(data,year,max)) - - dataframe <- data.frame(df,year) - - for (k in 1:length(Year)) { - ndata <- dataframe[dataframe$year == Year[k],] - days[k] <- length(ndata$Flow) - - ndata <- ndata[ndata$Flow == amax[k],] - maxdate[k] <- ndata[1, 3] - maxdate_a <- timeDate::as.timeDate(maxdate[k]) - doy[k] <- timeDate::dayOfYear(maxdate_a) - } - - result <- data.frame(Year,amax, maxdate, doy, days) - - return(result) - +#' Extracts annual maximum values from ECDE dataframe. +#' +#' @description +#' Extracts annual maximum values, the date of occurrence, the day of year, and the completeness +#' from ECDE dataframe. Uses functions from timeDate ( \code{as.timeDate}, \code{dayOfYear}). +#' +#' @param df A dataframe of daily streamflow data from ECDE +#' +#' @return Returns a dataframe with the following variables +#' \item{year}{} +#' \item{annual maximum}{} +#' \item{date of annual maximum}{} +#' \item{day of year of annual maximum}{} +#' \item{days}{number of days with observations} +#' +#' @export +#' +#' @author Paul Whitfield +#' @seealso \code{\link{ch_read_ECDE_flows}} \code{\link{ch_circ_mean_reg}} +#' @examples +#' data(CAN05AA008) +#' amax <- ch_sh_get_amax(CAN05AA008) +#' str(amax) + + +ch_sh_get_amax <- function(df) { + + data <- df$Flow + Date <- df$Date + year <- format(Date, "%Y") + Year <- as.numeric(unique(year)) + maxdate <- array(NA, dim = length(Year)) + doy <- array(NA, dim = length(Year)) + days <- array(NA, dim = length(Year)) + class(maxdate) <- "Date" + year <- as.factor(year) + + amax <- as.numeric(tapply(data,year,max)) + + dataframe <- data.frame(df,year) + + for (k in 1:length(Year)) { + ndata <- dataframe[dataframe$year == Year[k],] + days[k] <- length(ndata$Flow) + + ndata <- ndata[ndata$Flow == amax[k],] + maxdate[k] <- ndata[1, 3] + maxdate_a <- timeDate::as.timeDate(maxdate[k]) + doy[k] <- timeDate::dayOfYear(maxdate_a) + } + + result <- data.frame(Year,amax, maxdate, doy, days) + + return(result) + } \ No newline at end of file From 79718a6f9026db8ae44bf8d4bb2e143042779cd6 Mon Sep 17 00:00:00 2001 From: Paul Whitfield Date: Tue, 9 Sep 2025 09:12:48 -0700 Subject: [PATCH 2/8] change ch_get_amax from ch_sh_get_amax --- NAMESPACE | 2 +- R/ch_circ_mean_reg.R | 4 ++-- R/ch_ffa_screen_plot.R | 2 +- R/ch_get_ECDE_metadata.R | 8 +++++--- R/ch_get_amax.R | 10 ++++++++-- R/ch_get_peaks.R | 8 ++++++-- R/ch_polar_plot_peaks.R | 2 +- man/ch_circ_mean_reg.Rd | 4 ++-- man/ch_ffa_screen_plot.Rd | 2 +- man/{ch_sh_get_amax.Rd => ch_get_amax.Rd} | 10 +++++----- man/ch_polar_plot_peaks.Rd | 2 +- 11 files changed, 33 insertions(+), 21 deletions(-) rename man/{ch_sh_get_amax.Rd => ch_get_amax.Rd} (83%) diff --git a/NAMESPACE b/NAMESPACE index 9c34859..987a031 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ export(ch_flow_raster) export(ch_flow_raster_qa) export(ch_flow_raster_trend) export(ch_get_ECDE_metadata) +export(ch_get_amax) export(ch_get_peaks) export(ch_get_url_data) export(ch_get_wscstation) @@ -50,7 +51,6 @@ export(ch_rfa_distseason) export(ch_rfa_extractamax) export(ch_rfa_julianplot) export(ch_rfa_seasonstat) -export(ch_sh_get_amax) export(ch_slice) export(ch_stack_EC) export(ch_sub_set_Years) diff --git a/R/ch_circ_mean_reg.R b/R/ch_circ_mean_reg.R index 105b93f..f211cfb 100644 --- a/R/ch_circ_mean_reg.R +++ b/R/ch_circ_mean_reg.R @@ -22,10 +22,10 @@ #' #' @import circular #' @export -#' @seealso \code{\link{ch_sh_get_amax}} +#' @seealso \code{\link{ch_get_amax}} #' @examples #' data(CAN05AA008) -#' am <- ch_sh_get_amax(CAN05AA008) +#' am <- ch_get_amax(CAN05AA008) #' m_r <- ch_circ_mean_reg(am) ch_circ_mean_reg <- function(dataframe){ diff --git a/R/ch_ffa_screen_plot.R b/R/ch_ffa_screen_plot.R index 8322de5..ac9d7d8 100644 --- a/R/ch_ffa_screen_plot.R +++ b/R/ch_ffa_screen_plot.R @@ -37,7 +37,7 @@ #'@examples \donttest{ #'# Not tested automatically as can be very slow to execute #' data(CAN05AA008) -#' amax <- ch_sh_get_amax(CAN05AA008) +#' amax <- ch_get_amax(CAN05AA008) #' ch_ffa_screen_plot(amax)} ch_ffa_screen_plot <- function(df, stn= "unspecified", mtitle = "", diff --git a/R/ch_get_ECDE_metadata.R b/R/ch_get_ECDE_metadata.R index a256266..a9163da 100644 --- a/R/ch_get_ECDE_metadata.R +++ b/R/ch_get_ECDE_metadata.R @@ -22,6 +22,7 @@ #' \item{Latitude}{} #' \item{Longitude}{} #' \item{DrainageArea}{km\eqn{^2}{^2}} +#' \item{Eff_DrainageArea}{ km effective drainage area} #' \item{Years}{Number of years with data} #' \item{From}{Start Year} #' \item{To}{End Year} @@ -56,13 +57,14 @@ ch_get_ECDE_metadata <- function(filename, writefile=NULL){ stop("ECDE file not found") } - meta <- read.table(filename, skip = 96, sep = " ", na.strings = -999) + meta <- read.table(filename, skip = 103, sep = " ", na.strings = -999) names(meta) <- c("Station", "Fav", "StationName", "HydStatus", "Prov", "Latitude", - "Longitude", "DrainageArea", "Years", "From", "To", "Reg.", + "Longitude", "DrainageArea", "EFF_DrainageArea", + "Years", "From", "To", "Reg.", "Flow", "Level", "Sed", "OperSched", "RealTime", "RHBN", "Region", "Datum", "Operator") - meta <- meta[,c(1, 3:21)] + meta <- meta[,c(1, 3:22)] if (!is.null(writefile)) write.csv(meta, writefile, row.names = FALSE) diff --git a/R/ch_get_amax.R b/R/ch_get_amax.R index 58fd0f1..eb1ca79 100644 --- a/R/ch_get_amax.R +++ b/R/ch_get_amax.R @@ -12,6 +12,7 @@ #' \item{date of annual maximum}{} #' \item{day of year of annual maximum}{} #' \item{days}{number of days with observations} +#' \item{SYM} {WSC SYM code} #' #' @export #' @@ -19,14 +20,18 @@ #' @seealso \code{\link{ch_read_ECDE_flows}} \code{\link{ch_circ_mean_reg}} #' @examples #' data(CAN05AA008) -#' amax <- ch_sh_get_amax(CAN05AA008) +#' amax <- ch_get_amax(CAN05AA008) #' str(amax) -ch_sh_get_amax <- function(df) { +ch_get_amax <- function(df) { data <- df$Flow Date <- df$Date + + df$SYM[df$SYM == " "] <- "" #added + df$SYM[df$SYM == " "] <- "" #added + year <- format(Date, "%Y") Year <- as.numeric(unique(year)) maxdate <- array(NA, dim = length(Year)) @@ -44,6 +49,7 @@ ch_sh_get_amax <- function(df) { days[k] <- length(ndata$Flow) ndata <- ndata[ndata$Flow == amax[k],] + SYM[k] <- ndata[1, 5] ### added maxdate[k] <- ndata[1, 3] maxdate_a <- timeDate::as.timeDate(maxdate[k]) doy[k] <- timeDate::dayOfYear(maxdate_a) diff --git a/R/ch_get_peaks.R b/R/ch_get_peaks.R index 022dedc..88f7b46 100644 --- a/R/ch_get_peaks.R +++ b/R/ch_get_peaks.R @@ -27,6 +27,7 @@ #' \item{max}{maximum discharge during event} #' \item{volume}{flow volume during the event} #' \item{duration}{length of the event in days} +#' \item{SYM}{WSC SYM code - data flag} #' The \code{case} list contains the flows during an event and also for four preceding and subsequent days. Each event will have #' a length between nine to n days in length. Note: in rare cases where the event is in progress when data becomes available the @@ -72,6 +73,7 @@ ch_get_peaks <- function(dataframe, threshold) { class(max_date) <-"Date" case <-list(dim=3) + SYM <- array(NA, dim = 3) ### added for (i in 1:length(data)) { @@ -103,7 +105,7 @@ ch_get_peaks <- function(dataframe, threshold) { max[index]=data[k] max_date[index] <-Date[k] - + SYM[index] <- inSYM[k] ## added volume[index] <- 0.0 duration[index] <-0 flag <- 1 @@ -114,6 +116,7 @@ ch_get_peaks <- function(dataframe, threshold) { { if (data[k] > max[index]) {max[index] <- data[k] max_date[index] <-Date[k] + SYM[index] <- inSYM[k] ## added } volume[index] <- volume[index] + data[k] @@ -134,7 +137,8 @@ ch_get_peaks <- function(dataframe, threshold) { max_date <- as.Date(max_date, format="%Y-%m-%d") - POT_events <- data.frame(st_date, max_date, max, volume, duration) + POT_events <- data.frame(st_date, max_date, max, volume, + duration, SYM) ## note change to add SYM diff --git a/R/ch_polar_plot_peaks.R b/R/ch_polar_plot_peaks.R index 9a2500c..b1c5ef5 100644 --- a/R/ch_polar_plot_peaks.R +++ b/R/ch_polar_plot_peaks.R @@ -52,7 +52,7 @@ #' #' # plot of annual maximum series #' data(CAN05AA008) -#' am <- ch_sh_get_amax(CAN05AA008) +#' am <- ch_get_amax(CAN05AA008) #' ch_polar_plot_peaks(days = am$doy, title = "05AA008") #' #' #remove partial years diff --git a/man/ch_circ_mean_reg.Rd b/man/ch_circ_mean_reg.Rd index a54c4af..6d3aa63 100644 --- a/man/ch_circ_mean_reg.Rd +++ b/man/ch_circ_mean_reg.Rd @@ -23,7 +23,7 @@ Calculate the circular mean, median, and regularity using a year of 365 days. } \examples{ data(CAN05AA008) -am <- ch_sh_get_amax(CAN05AA008) +am <- ch_get_amax(CAN05AA008) m_r <- ch_circ_mean_reg(am) } \references{ @@ -35,5 +35,5 @@ Pewsey, A., M. Neuhauser, and G. D. Ruxton. 2014. Circular Statistics in R, from climate change. } \seealso{ -\code{\link{ch_sh_get_amax}} +\code{\link{ch_get_amax}} } diff --git a/man/ch_ffa_screen_plot.Rd b/man/ch_ffa_screen_plot.Rd index d038a46..69c8394 100644 --- a/man/ch_ffa_screen_plot.Rd +++ b/man/ch_ffa_screen_plot.Rd @@ -51,7 +51,7 @@ occurred. \donttest{ # Not tested automatically as can be very slow to execute data(CAN05AA008) -amax <- ch_sh_get_amax(CAN05AA008) +amax <- ch_get_amax(CAN05AA008) ch_ffa_screen_plot(amax)} } \references{ diff --git a/man/ch_sh_get_amax.Rd b/man/ch_get_amax.Rd similarity index 83% rename from man/ch_sh_get_amax.Rd rename to man/ch_get_amax.Rd index ec37930..080c22d 100644 --- a/man/ch_sh_get_amax.Rd +++ b/man/ch_get_amax.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ch_sh_get_amax.R -\name{ch_sh_get_amax} -\alias{ch_sh_get_amax} +% Please edit documentation in R/ch_get_amax.R +\name{ch_get_amax} +\alias{ch_get_amax} \title{Extracts annual maximum values from ECDE dataframe.} \usage{ -ch_sh_get_amax(df) +ch_get_amax(df) } \arguments{ \item{df}{A dataframe of daily streamflow data from ECDE} @@ -23,7 +23,7 @@ from ECDE dataframe. Uses functions from timeDate ( \code{as.timeDate}, \code{da } \examples{ data(CAN05AA008) -amax <- ch_sh_get_amax(CAN05AA008) +amax <- ch_get_amax(CAN05AA008) str(amax) } \seealso{ diff --git a/man/ch_polar_plot_peaks.Rd b/man/ch_polar_plot_peaks.Rd index a717593..8f3ec93 100644 --- a/man/ch_polar_plot_peaks.Rd +++ b/man/ch_polar_plot_peaks.Rd @@ -87,7 +87,7 @@ ch_polar_plot_peaks(shading = TRUE) # plot of annual maximum series data(CAN05AA008) -am <- ch_sh_get_amax(CAN05AA008) +am <- ch_get_amax(CAN05AA008) ch_polar_plot_peaks(days = am$doy, title = "05AA008") #remove partial years From 87535cdae1e167e88613fc17109c8df1aee65889 Mon Sep 17 00:00:00 2001 From: Paul Whitfield Date: Mon, 15 Sep 2025 09:34:47 -0700 Subject: [PATCH 3/8] code was removing one too many lines resulting in a missing December 31st. --- R/ch_read_ECDE_flows.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ch_read_ECDE_flows.R b/R/ch_read_ECDE_flows.R index b2a59a8..f32edb5 100644 --- a/R/ch_read_ECDE_flows.R +++ b/R/ch_read_ECDE_flows.R @@ -46,7 +46,7 @@ ch_read_ECDE_flows <- function(filename) { mdata <- read.csv(filename, header = FALSE, skip = 1) names(mdata) <- c("ID", "PARAM", "Date", "Flow", "SYM") mdata$Date <- as.Date(mdata$Date, format = "%Y/%m/%d") - cut <- length(mdata[, 1]) - 3 + cut <- length(mdata[, 1]) - 2 mdata <- mdata[1:cut, ] if (names(mdata)[4] != "Flow") From 9727b7980e5b46619b8e1e01cdd52f84ff081324 Mon Sep 17 00:00:00 2001 From: Paul Whitfield Date: Mon, 15 Sep 2025 10:08:09 -0700 Subject: [PATCH 4/8] new function that determines timing outliers using functions originally from Sau & Rodrigues 2018 --- R/SR_hstat.R | 343 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 R/SR_hstat.R diff --git a/R/SR_hstat.R b/R/SR_hstat.R new file mode 100644 index 0000000..d087752 --- /dev/null +++ b/R/SR_hstat.R @@ -0,0 +1,343 @@ +#' SR_hstat +#' +#' @description These are updated versions of the functions described in +#' Sau & Rodrigues (2018). Daniela Rodriguez kindly shared their original code. +#' The main modifications here were that the data tables are not read in, but included +#' as variables and some language was changed to English from Spanish. +#' +#' This function is basically a wrapper for the series of functions written +#' by Sau & Rogrigues, and these revisions were made by Paul Whitfield. +#' Originally separate functions, they were nested with their parent. +#' +#' Therefore there are three main functions and the execution code within. +#' All the original comments were preserved subsequent to conversion +#' to English letter equivalents; these are in comment lines starting with ##. +#' +#' @param x An array of positions on the unit circle in radians +#' @param alpha a confidence limit. +#' +#' @return a list containing +#' \describe{ +#' \item (mu) (circular number of mean directions) +#' \item (ventana) (circular number of window) +#' \item (muini) (circular number initial value of mu ) +#' \item (conc) (estimate of the concentration parameter defined in Ko (1992).) +#' \item (malpha) (1 - alpha) +#' \item (distance) (array of distance to be compared to corte) +#' \item (corte) (cut value) +#' \item (isout) (sequential indexes of outlier wherein 0 is an outlier and a 1 is not) +#' \item (porcout) (proportion of outliers in sample) +#' } +#' @references +#' Sau, M.F., and D. Rodriguez. 2018. "Minimum distance method for +#' directional data and outlier detection." Advances in Data Analysis +#' and Classification 12:587-603. +#' +#' Ko D (1992) Robust estimation of the concentration parameter of the von Mises-Fisher +#' distribution. Ann Stat 20:917–928 +#' +#' Whitfield, P. H. and D. H. Burn (2025 in review). "Screening Annual Maxima and +#' Peaks-Over-Threshold Series for Flood Frequency Analysis." + +#' +#' @import CircStats +#' @import circular +#' @import movMF +#' +#' @author Mercedes Fernandez Sau, Daniela Rodrigues, Paul Whitfield +#' +#' @examples +#' data1 <- circular::rwrappedcauchy(100, mu=circular(0), rho=0.7, control.circular=list(units="degrees")) +# alpha <- 0.05 +#' result <- SR_hstat(data1, alpha) +#' result$isout # lists the outliers with 0 being an outlier and 1 not. + +require(CircStats) +require(circular) +require(movMF) +######################### Text from original set of functions +#FUNCTIONS +##M?nima distancia para un h m?nimo +#Minimum distance for an h min +# S&R was originally a series of separate functions, these are now +# nested into three main functions +################################################ +# the following is the original example code. This is now the example, +# and the main actions of the function. +# +## Example +## the data +##data1 <- rwrappedcauchy(100, mu=circular(0), rho=0.7, control.circular=list(units="degrees")) +## alpha<-0.05 +## malpha<-1-alpha + +## MDEVM <- mindisthminconc(data1) # minimum distance estimator under von mises distribution +## estmu <- MDEVM$mu +## estconc <- MDEVM$conc +## detection <- vmd2deteccion(data1,estmu) +##distance <- detection$norma +## corte <- cortando(estconc,malpha) +## isout<- 1*(distance <= corte) +## isout # 0 is an outlier +## porcout<-1-mean(isout) +## porcout # outliers proportions in de sample + +# so there are only three funtions that are called +# mindisthminconc(data1) +# vmd2deteccion(data1,estmu) +# cortando(estconc,malpha) + +# all the others get called only within middisthminconc +# now nested within three main functions + +SR_hstat <- function(x, alpha = 0.05){ + + ############################################### + ##estimador de m?nima distancia + #estimator of minimum distance + ############################################### + + ################################################## minimum distance function + + mindisthminconc <- function(x) + { + + ini <- inicialesrob2(x) + + conc <- ini$conc + muini <- ini$mu + options(warn = -1) ### added to suppress coercion warning + ini <- c(muini,1) + options(warn = 0) + + + ## optim is a general purpose optimization (stats) + res <- optim(ini, method = "L-BFGS-B", + intargumento, + kk = conc, + xm = x, + lower = c(0,0.001), + upper = c(2*pi,1.41)) + + resultado <- list(mu=res$par[1], ventana=res$par[2], + muini=muini, conc=conc) + return(resultado) + } + + ############################################ nucepan + ##nucleo de epanechnikov + # core + # called in nucleo + nucepan <- function(x) + { + + a <- 1.5*(1-x*x) + kepan <- a*(abs(x) < 1) + kepan + } + + ############################################ nucleo + ##me da el Kh + # called in suavizado + nucleo <- function(punto, xt, ventana) ### nucleo = core punto = spot ventana = window + { + + puntobis <- cbind(cos(punto), sin(punto)) + xtbis <- cbind(cos(xt), sin(xt)) + prodinter <- as.vector((xtbis) %*% t(puntobis)) + arg <- (1 - prodinter)/(ventana * ventana) + nucleovec <- nucepan(arg) + nucleovec + } + ############################################# suavizado + ## suavizado para epanechnikov (fsombrero) + # smoothed for Epanechikov[kernel] fhat + # called in argumento + suavizado <- function(punto, x, ven) ## suavizado = smoothed + { + ## funcion de la ventana + # window function + + intk <- 12 / 5 + lambda <- intk * sqrt(2) + cven <- 1 / (lambda * ven) + wei <- nucleo(punto, x, ven) + suav <- cven * mean(wei, na.rm = T) + suav + } + + ############################################## argumento + # called in intargumento + argumento <- function(punto, cc, x, conc) ## argumento = argument + { + mu <- cc[1] + ven <- cc[2] + + arg <- (suavizado(punto, x, ven) - dvm(punto, mu, conc)) ## dvm(CircStats) return the von Mises at a particular value + + argum <- arg * arg + + argum + } + + + + ############################################## intargumento + # called in mindisthminconc + intargumento <- function(cc, xm, kk) + { + mm <- cc[1] + hh <- cc[2] + grillas <- seq(0, 2*pi, length=1000) ## grillas = grids + resul <- grillas + for(j in 1:1000) + { + resul[j] <- argumento(grillas[j], cc, xm, kk) + } + res <- mean(resul) + res + } + + ################################################ inicialesrob2 + # called in mindisthminconc + inicialesrob2 <- function(muestra) ## muestra = sample + { + + muini <- median(muestra) + muini <- ((muini<2*pi) * (muini>0) * muini) + + ((2*pi + muini) * (muini<0)) + ((muini - 2 * pi) * (muini>2 * pi)) + mediana <- cbind(cos(muini), sin(muini)) + muestrat <- cbind(cos(muestra), sin(muestra)) + prodinter <- median(as.vector((mediana) %*% t(muestrat))) + + conc <- c2menos(prodinter) ## c2menos is a S&R function + resultado <- list(mu = muini, conc = conc) + + return(resultado) + } + + # original code read files for V1 and V2 + ##trabajokofunc2menos <- read.table("~/My Dropbox/daniela/trabajosfuturos/mindistesf/simulacion/circulo/trabajokofunc2menos.txt", quote="\"") + ##trabajokofunc2menos <- read.table("trabajokofunc2menos.txt", quote="\"") + + V1 <- c(0.010, 0.020, 0.030, 0.040, 0.050, 0.060, 0.070, 0.080, 0.090, 0.100, 0.110, + 0.120, 0.130, 0.140, 0.150, 0.160, 0.170, 0.180, 0.190, 0.200, 0.210, 0.220, + 0.230, 0.240, 0.250, 0.260, 0.270, 0.280, 0.290, 0.300, 0.310, 0.320, 0.330, + 0.340, 0.350, 0.360, 0.370, 0.380, 0.390, 0.400, 0.410, 0.420, 0.430, 0.440, + 0.450, 0.460, 0.470, 0.480, 0.490, 0.500, 0.510, 0.520, 0.530, 0.540, 0.550, + 0.560, 0.570, 0.580, 0.590, 0.600, 0.610, 0.620, 0.630, 0.640, 0.650, 0.660, + 0.670, 0.680, 0.690, 0.700, 0.710, 0.720, 0.730, 0.740, 0.750, 0.760, 0.770, + 0.780, 0.790, 0.800, 0.810, 0.820, 0.830, 0.840, 0.850, 0.860, 0.870, 0.880, + 0.890, 0.900, 0.905, 0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, + 0.950, 0.955, 0.960, 0.965, 0.970, 0.975, 0.980, 0.985, 0.990, 0.991, 0.992, + 0.993, 0.994, 0.995, 0.996, 0.997) + + V2 <- c(0.0100, 0.0200, 0.0300, 0.0400, 0.0501, 0.0601, 0.0702, 0.0803, + 0.0904, 0.1006, 0.1107, 0.1210, 0.1312, 0.1415, 0.1519, 0.1623, + 0.1728, 0.1833, 0.1939, 0.2046, 0.2153, 0.2262, 0.2371, 0.2481, + 0.2592, 0.2703, 0.2816, 0.2930, 0.3046, 0.3162, 0.3280, 0.3399, + 0.3519, 0.3641, 0.3765, 0.3890, 0.4017, 0.4146, 0.4277, 0.4410, + 0.4545, 0.4683, 0.4822, 0.4964, 0.5109, 0.5257, 0.5408, 0.5562, + 0.5719, 0.5879, 0.6043, 0.6212, 0.6384, 0.6561, 0.6742, 0.6929, + 0.7120, 0.7318, 0.7521, 0.7731, 0.7948, 0.8173, 0.8405, 0.8647, + 0.8897, 0.9158, 0.9430, 0.9713, 1.0010, 1.0321, 1.0648, 1.0992, + 1.1356, 1.1741, 1.2149, 1.2585, 1.3050, 1.3550, 1.4088, 1.4671, + 1.5306, 1.6001, 1.6768, 1.7619, 1.8573, 1.9651, 2.0884, 2.2311, + 2.3990, 2.5998, 2.7160, 2.8452, 2.9897, 3.1525, 3.3374, 3.5492, + 3.7944, 4.0812, 4.4212, 4.8304, 5.3317, 5.9597, 6.7686, 7.8485, + 9.3619, 11.6337, 15.4221, 23.0017, 25.5287, 28.6874, 32.7489, 38.1643, + 45.7461, 57.1190, 76.0742) + + trabajokofunc2menos <- data.frame(V1, V2) + + ################################################ c2menos + # only called in inicialesrob2 + c2menos <- function(tt) + { + + dimtr <- dim(trabajokofunc2menos)[1] + dimtr1 <- dimtr-1 + aa <- rep(0,dimtr) + aa[1] <- trabajokofunc2menos[1,2] * (tt <= trabajokofunc2menos[1,1]) + aa[dimtr] <- trabajokofunc2menos[dimtr,2] * (tt > trabajokofunc2menos[dimtr,1]) + for(i in 2:dimtr1) + { + aa[i] <- trabajokofunc2menos[i,2] * (tt > trabajokofunc2menos[(i-1),1]) * (tt <= trabajokofunc2menos[i,1]) + } + sum(aa) + + } + + ####################################### + + ################################################## end of main function mindisthmin + ################################################## minimum distance function end + + + ################################################### main function vmd2deteccion + ##Minima distancia para una Von Mises Fisher en dim 2 con k prefijados + + vmd2deteccion <- function(datos, mu) ## datos = data + { + puntobis <- cbind(cos(mu), sin(mu)) + xtbis <- cbind(cos(datos), sin(datos)) + distancias <- as.vector((xtbis) %*% t(puntobis)) + norma <- 2 - 2 * distancias + resultado <- list(norma = norma) + return(resultado) + } + ################################################### main function vmd2deteccion ends + + ################################################### main function cortando + cortando <- function(kk, poda) ##cortando = cutting poda = pruning + { + ################################################ + # called in cortando with integrate + intfun <- function(t,k) + { + exp(k * t) * (1 - t^2) ^ (-1/2) + } + + intfun2 <- function(aa,kk,poda) ## aa is from c2menos + { + cte <- integrate(intfun, -1, 1, k=kk)$value + integrate(intfun, -1, aa, k=kk)$value - (cte * (1 - poda)) + } + y1mpoda <- uniroot(intfun2, lower = -0.9999, upper = 1, kk=kk, poda=poda)$root + # one dimensional root (zero) finding search from lower to upper + 2 - 2 * y1mpoda + + } # intfun was moved into cortando + + ################################################### main function cortand ends + ############################################## + # main actions of function + malpha <- 1-alpha + + + + MDEVM <- mindisthminconc(x) # minimum distance estimator under von mises distribution + estmu <- MDEVM$mu + estconc <- MDEVM$conc + + + detection <- vmd2deteccion(x,estmu) + distance <- detection$norma + + corte <- cortando(estconc,malpha) + + isout<- 1*(distance <= corte) + ## isout # 0 is an outlier + + porcout<-1-mean(isout) + ## porcout # outliers proportions in de sample + + + result <- list (MDEVM$mu, MDEVM$ventana, MDEVM$muini, MDEVM$conc, + malpha, distance, corte, isout, porcout) + names(result) <- c("mu", "ventana", "muini", "conc", + "malpha", "distance", "corte", "isout", "porcout") + + return (result) + +} From 190458a84d3cc05f3853ab2dc87c82bcd1247f79 Mon Sep 17 00:00:00 2001 From: Paul Whitfield Date: Mon, 15 Sep 2025 10:12:46 -0700 Subject: [PATCH 5/8] minor text edits --- R/SR_hstat.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/R/SR_hstat.R b/R/SR_hstat.R index d087752..03a5dd3 100644 --- a/R/SR_hstat.R +++ b/R/SR_hstat.R @@ -1,12 +1,12 @@ #' SR_hstat #' #' @description These are updated versions of the functions described in -#' Sau & Rodrigues (2018). Daniela Rodriguez kindly shared their original code. +#' Sau & Rodriguez (2018). Daniela Rodriguez kindly shared their original code. #' The main modifications here were that the data tables are not read in, but included #' as variables and some language was changed to English from Spanish. #' #' This function is basically a wrapper for the series of functions written -#' by Sau & Rogrigues, and these revisions were made by Paul Whitfield. +#' by Sau & Rodriguez, and these revisions were made by Paul Whitfield. #' Originally separate functions, they were nested with their parent. #' #' Therefore there are three main functions and the execution code within. @@ -52,9 +52,7 @@ #' result <- SR_hstat(data1, alpha) #' result$isout # lists the outliers with 0 being an outlier and 1 not. -require(CircStats) -require(circular) -require(movMF) + ######################### Text from original set of functions #FUNCTIONS ##M?nima distancia para un h m?nimo @@ -62,7 +60,7 @@ require(movMF) # S&R was originally a series of separate functions, these are now # nested into three main functions ################################################ -# the following is the original example code. This is now the example, +# the following is the original example code. This is now the functions example, # and the main actions of the function. # ## Example @@ -75,14 +73,14 @@ require(movMF) ## estmu <- MDEVM$mu ## estconc <- MDEVM$conc ## detection <- vmd2deteccion(data1,estmu) -##distance <- detection$norma +## distance <- detection$norma ## corte <- cortando(estconc,malpha) ## isout<- 1*(distance <= corte) ## isout # 0 is an outlier ## porcout<-1-mean(isout) ## porcout # outliers proportions in de sample -# so there are only three funtions that are called +# so there are only three functions that are called # mindisthminconc(data1) # vmd2deteccion(data1,estmu) # cortando(estconc,malpha) @@ -216,7 +214,7 @@ SR_hstat <- function(x, alpha = 0.05){ return(resultado) } - # original code read files for V1 and V2 + # original code read files for V1 and V2 here defined internally ##trabajokofunc2menos <- read.table("~/My Dropbox/daniela/trabajosfuturos/mindistesf/simulacion/circulo/trabajokofunc2menos.txt", quote="\"") ##trabajokofunc2menos <- read.table("trabajokofunc2menos.txt", quote="\"") From 30c58f86630a20d84a5bbabb3b50d248e51881c9 Mon Sep 17 00:00:00 2001 From: Paul Whitfield Date: Mon, 15 Sep 2025 10:41:07 -0700 Subject: [PATCH 6/8] added @export to roxygen --- R/SR_hstat.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/SR_hstat.R b/R/SR_hstat.R index 03a5dd3..ad1c968 100644 --- a/R/SR_hstat.R +++ b/R/SR_hstat.R @@ -44,7 +44,8 @@ #' @import circular #' @import movMF #' -#' @author Mercedes Fernandez Sau, Daniela Rodrigues, Paul Whitfield +#' @author original code Mercedes Fernandez Sau, Daniela Rodriguez, modified Paul Whitfield +#' @export #' #' @examples #' data1 <- circular::rwrappedcauchy(100, mu=circular(0), rho=0.7, control.circular=list(units="degrees")) From 219d33b2c19d1c32698d6a5bae1b41c22654db3b Mon Sep 17 00:00:00 2001 From: Paul Whitfield Date: Mon, 15 Sep 2025 11:02:15 -0700 Subject: [PATCH 7/8] new function based on POT plot.method to plot Pareto with informative symbols --- R/ch_plot_POT.R | 117 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 R/ch_plot_POT.R diff --git a/R/ch_plot_POT.R b/R/ch_plot_POT.R new file mode 100644 index 0000000..519e968 --- /dev/null +++ b/R/ch_plot_POT.R @@ -0,0 +1,117 @@ +#' ch_plot_POT +#' +#' @description +#' A plot of peaks-over-threshold derived from a plot method from the package POT. +#' +#' A single object matching ‘retlev.uvpot’ was found. It was found in the following places +#' registered S3 method for retlev from namespace POT +#' namespace:POT +#' with value +#' +#' The original function only plotted generic symbols for the observations. +#' It was modified to add informative symbols and colours for FFA screening. These +#' are indicated with a "+" below. +#' The plots are shown in Whitfield and Burn (2025b) +#' +#' @param object output from POT function fitgpd +#' @param npy number of events per year = #events/#years +#' @param main plot title +#' @param xlab xaxis label +#' @param ylab y axis label +#' @param xlimsup upper limit for Pareto +#' @param mon array of months of observations [1-12] + +#' @param ccol array of circular colours [12] + +#' @param mcex size of plot symbols [4] + +#' @param mpch symbols for points [4] + +#' @param mcode array of indexes for symbol plotting [1-4] + +#' @param sr_pch symbols for overlay [NA, 8] + +#' @param mcol array of colours for outline of points [3] + +#' @param isout array of indexes for timing outliers [1-2] + +#' @param ci confidence interval = TRUE +#' @param points plot points = TRUE +#' @param ... +#' +#' @return returns the pot function invisibly and generates a plot +#' +#' @export +#' +#' @reference +#' Whitfield, P. H. and D. H. Burn (2025*). "Screening Annual Maxima and +#' Peaks-Over-Threshold Series for Flood Frequency Analysis." +#' +#' @author modified by Paul Whitfield original from POT +#' +#' @examples +#' \dontrun{} +#' \donttest{} +#' + +ch_plot_POT <- function (object, npy, main, xlab, ylab, xlimsup, + mon, ccol, mcex, mpch, mcode, sr_pch, mcol, isout, + ci = TRUE, + points = TRUE, ...) +{ +### changes to call to include symbols and codes +### line starting with mon... to isoout + + par(las = 1) + + if (!inherits(object, "uvpot")) + stop("Use only with 'uvpot' objects") + if (object$var.thresh) + stop("Return Level plot is available only for constant threshold !") + data <- object$exceed + loc <- object$threshold[1] + scale <- object$param["scale"] + shape <- object$param["shape"] + n <- object$nat + pot.fun <- function(T) { + p <- rp2prob(T, npy)[, "prob"] + return(qgpd(p, loc, scale, shape)) + } + eps <- 10^(-3) + if (!is.null(object$noy)) + npy <- n/object$noy + else if (missing(npy)) { + warning("Argument ``npy'' is missing. Setting it to 1.") + npy <- 1 + } + + if (missing(main)) + main <- "Return Level Plot" + if (missing(xlab)) + xlab <- "Return Period (yr) [Pareto]" + if (missing(ylab)) + ylab <- expression( "Peak Flow ("*m^3*s^{-1}*")" ) + if (missing(xlimsup)) + xlimsup <- prob2rp((n - 0.35)/n, npy)[, "retper"] + ylims <- c(min(data),max(data)) + + plot(pot.fun, from = 1/npy + eps, to = xlimsup, log = "x", + xlab = xlab, ylab = ylab, ylim = ylims, main = main, ...) + if (points) { ### add fancy points + + p_emp <- (1:n - 0.35)/n +################################ add symbols and colours +mytest <- 1/(npy * (1 - p_emp)) + +points(1/(npy * (1 - p_emp)), sort(data), pch = mpch[mcode], + bg = ccol[mon], col = mcol[mcode]) + +points(1/(npy * (1 - p_emp)), sort(data), pch = sr_pch[isout], cex = 1.3, + col = mcol[4]) +######################################################## + } + if (ci) { ### add confidence limts + p_emp <- (1:n - 0.35)/n + samp <- rgpd(1000 * n, loc, scale, shape) + samp <- matrix(samp, n, 1000) + samp <- apply(samp, 2, sort) + samp <- apply(samp, 1, sort) + ci_inf <- samp[25, ] + ci_sup <- samp[975, ] + lines(1/(npy * (1 - p_emp)), ci_inf, lty = 2 , col = "gray30") + lines(1/(npy * (1 - p_emp)), ci_sup, lty = 2, col = "gray30") + } + invisible(pot.fun) +} From 25f8caa2c0abd0a5456ee66af623aaea3c18df86 Mon Sep 17 00:00:00 2001 From: Paul Whitfield Date: Tue, 7 Oct 2025 09:58:18 -0700 Subject: [PATCH 8/8] minor fixes --- R/ch_get_amax.R | 6 +++--- R/ch_get_peaks.R | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/ch_get_amax.R b/R/ch_get_amax.R index eb1ca79..c6421dc 100644 --- a/R/ch_get_amax.R +++ b/R/ch_get_amax.R @@ -29,8 +29,7 @@ ch_get_amax <- function(df) { data <- df$Flow Date <- df$Date - df$SYM[df$SYM == " "] <- "" #added - df$SYM[df$SYM == " "] <- "" #added + year <- format(Date, "%Y") Year <- as.numeric(unique(year)) @@ -43,6 +42,7 @@ ch_get_amax <- function(df) { amax <- as.numeric(tapply(data,year,max)) dataframe <- data.frame(df,year) + SYM <- array(length(Year)) for (k in 1:length(Year)) { ndata <- dataframe[dataframe$year == Year[k],] @@ -55,7 +55,7 @@ ch_get_amax <- function(df) { doy[k] <- timeDate::dayOfYear(maxdate_a) } - result <- data.frame(Year,amax, maxdate, doy, days) + result <- data.frame(Year,amax, maxdate, doy, days, SYM) return(result) diff --git a/R/ch_get_peaks.R b/R/ch_get_peaks.R index 88f7b46..d7c94db 100644 --- a/R/ch_get_peaks.R +++ b/R/ch_get_peaks.R @@ -63,6 +63,7 @@ ch_get_peaks <- function(dataframe, threshold) { } data <- dataframe$Flow Date <- dataframe$Date + inSYM <- dataframe$SYM event <-array(0, dim=length(data)) event_num <-array(0, dim=length(data))