|
| 1 | +#' Temporal aggregation of daily or hourly data. |
| 2 | +#' |
| 3 | +#' Temporal aggregation of data with different possible aggregation functions.#' |
| 4 | +#' |
| 5 | +#' @param data Matrix with the data to be aggregated (time steps in rows, stations in columns). |
| 6 | +#' @param dates Vector of dates (POSIX class) of the original data. |
| 7 | +#' @param agg Level of temporal aggregation: daily ("D", only for hourly input data), monthly ("M"), seasonal ("S"), annual ("Y"). |
| 8 | +#' @param aggFun Function for temporal aggregation. Possible values: mean, median, min, max. Default: mean. |
| 9 | +#' @param input.mch logical. If the input data is from DWH, the hourly needs to be aggregated differently. Default: FALSE |
| 10 | +#' @return A list with aggregated data and dates. |
| 11 | +#' @details The result is a multi-year series. The input series is first filled with NA to have full years, i.e. the result will have NAs for the season/month with no data at all. If a season/month has data but is not complete, a warning is shown. |
| 12 | +#' @export |
| 13 | +#' @examples \dontrun{ |
| 14 | +#' # Generate some data |
| 15 | +#' dates.day <- seq(as.POSIXct(as.Date("20160101", format='%Y%m%d'), tz="UTC"), |
| 16 | +#' as.POSIXct(as.Date("20171231", format='%Y%m%d'), tz="UTC"), by="day") |
| 17 | +#' dates.hour <-seq(as.POSIXlt("201601010000", format='%Y%m%d%H%M'), |
| 18 | +#' as.POSIXlt("201712312350", format='%Y%m%d%H%M'), by="hour") |
| 19 | +#' data.day <- array(runif(length(dates.day)*2, -5, 30), dim=c(length(dates.day),2)) |
| 20 | +#' data.hour <- rnorm(length(dates.hour), mean=15, sd=2) |
| 21 | +#' # Aggregate from daily to annual |
| 22 | +#' data.agg(data.day, dates.day, agg="Y", aggFun="mean") |
| 23 | +#' # Aggregate from daily to monthly |
| 24 | +#' data.agg(data.day, dates.day, agg="M", aggFun="max") |
| 25 | +#' # Aggregate from daily to seasonal |
| 26 | +#' data.agg(data.day, dates.day, agg="S", aggFun="min") |
| 27 | +#' # Aggregate from hourly to daily |
| 28 | +#' data.agg(data.hour, dates.hour, agg="D", aggFun="mean") |
| 29 | +#' } |
| 30 | + |
| 31 | + |
| 32 | +data.agg <- function(data, dates, agg=NULL, aggFun="mean", input.mch=FALSE){ |
| 33 | + |
| 34 | + |
| 35 | + data <- as.matrix(data) # to avoid problems with 1 station (vector) |
| 36 | + if(!is.null(colnames(data))) stn.name <- colnames(data) |
| 37 | + |
| 38 | + # Check length of plot data and dates |
| 39 | + assertthat::assert_that(nrow(data)==length(dates), msg="Dates and data length do not match") |
| 40 | + |
| 41 | + # Filter by years first |
| 42 | + years <- as.numeric(unique(format(dates, '%Y'))) |
| 43 | + ind.year <- lapply(years, function(x) which(as.numeric(format(dates,'%Y')) %in% x)) |
| 44 | + |
| 45 | + # New aggregated dates |
| 46 | + if(agg=="D"){ |
| 47 | + dates.agg.full <- unique(format(dates, '%Y-%m-%d')) |
| 48 | + if(input.mch & format(dates[length(dates)], '%H')=="00" & as.numeric(format(dates[length(dates)], '%Y'))>=2018){ |
| 49 | + dates.agg <- unique(format(dates[1:(length(dates)-1)], '%Y-%m-%d'))# -1 because the last time step (00:00) is used in the aggregation of the last full day |
| 50 | + print("The aggregation is done from 01 of day D to 00 of day D+1") |
| 51 | + }else{ |
| 52 | + dates.agg <- unique(format(dates, '%Y-%m-%d')) |
| 53 | + print("The aggregation is done from 00 to 23 of day D") |
| 54 | + } |
| 55 | + } |
| 56 | + if(agg=="Y") dates.agg <- unique(format(dates, '%Y')) |
| 57 | + if(agg=="M") dates.agg <- as.character(seq(as.Date(dates[1]), as.Date(format(dates[length(dates)], format= "%Y-12-31")), by="month")) |
| 58 | + if(agg=="S") dates.agg <- as.character(seq(as.Date(dates[1]), as.Date(format(dates[length(dates)], format= "%Y-12-31")), by="quarter")) |
| 59 | + |
| 60 | + # Aggregate to daily (only from hourly) |
| 61 | + if(agg=="D"){ |
| 62 | + data.agg <- matrix(NA, nrow=length(dates.agg), ncol=ncol(data)) |
| 63 | + for (i in 1:(length(dates.agg))){ |
| 64 | + # In DWH hours to days are aggregated differently before and after 2018!! |
| 65 | + if(as.numeric(format(dates[length(dates)], '%Y'))>=2018){ |
| 66 | + ind.day.d <- which(format(dates,"%Y-%m-%d")==dates.agg.full[i] & as.numeric(format(dates,"%H"))>=1) |
| 67 | + ind.day.next <- which(format(dates,"%Y-%m-%d")==dates.agg.full[i+1] & as.numeric(format(dates,"%H"))==0) # The aggregation in the DWH for a day D is from 01:00 os day D to 00:00 of day D+1, therefore +1 timestep |
| 68 | + ind.day <- c(ind.day.d, ind.day.next) |
| 69 | + |
| 70 | + }else{ |
| 71 | + ind.day <- which(format(dates,"%Y-%m-%d")==dates.agg[i]) # it would take positions from data from 00 to 23 od day D |
| 72 | + } |
| 73 | + #if(sum(ind.day>length(dates))) {remove <- which(ind.day>length(dates)); ind.day <- ind.day[-remove]} |
| 74 | + |
| 75 | + for(j in 1:ncol(data)){ # if all data is NA, write NA (otherwise it writes Inf) |
| 76 | + if(sum(is.na(data[ind.day,j]))==length(ind.day)){ |
| 77 | + data.agg[i,j] <- NA |
| 78 | + } else{ |
| 79 | + data.agg[i,j] <- apply(as.matrix(data[ind.day,j]), 2, aggFun, na.rm=T) |
| 80 | + } |
| 81 | + } |
| 82 | + # warning if a day is not complete |
| 83 | + if(length(ind.day)!=0 & 24!= length(ind.day)) print(paste0("Warning: the input series for day ",dates.agg[i]," is not complete. The aggregation is calculated with the available data")) |
| 84 | + if(length(ind.day)==0) print(paste0("Warning: the input series for day ",dates.agg[i]," is empty.")) |
| 85 | + # close loop over days in the period |
| 86 | + } |
| 87 | + |
| 88 | + # when all values in a day are NA, the result is -Inf. Change to NA |
| 89 | + data.agg <- apply(data.agg,2,function(x) replace(x, is.infinite(x),NA)) |
| 90 | + } |
| 91 | + |
| 92 | + # Aggregate to monthly |
| 93 | + if(agg=="M"){ |
| 94 | + mon <- c(1:12) # it attemps to fill the whole month and year aggregated series |
| 95 | + ind.mon <- lapply(mon, function(x) which(as.numeric(format(dates,'%m')) %in% x)) |
| 96 | + i <- 1 |
| 97 | + data.agg <- matrix(NA, nrow=length(years)*length(mon), ncol=ncol(data)) |
| 98 | + for (y in 1:length(years)){ |
| 99 | + for(m in 1:length(mon)){ |
| 100 | + ind.dates <- intersect(ind.year[[y]], ind.mon[[m]]) |
| 101 | + |
| 102 | + # if ind.dates not empty, calculate aggregationn and save dates of the first time step (because the whole month may not be included) |
| 103 | + if(length(ind.dates)!=0) { |
| 104 | + #data.agg[i,] <- apply(as.matrix(data[ind.dates,]), 2, aggFun, na.rm=T) |
| 105 | + for(j in 1:ncol(data)){ # if all data is NA, write NA (otherwise it writes Inf) |
| 106 | + if(sum(is.na(data[ind.dates,j]))==length(ind.dates)){ |
| 107 | + data.agg[i,j] <- NA |
| 108 | + } else{ |
| 109 | + data.agg[i,j] <- apply(as.matrix(data[ind.dates,j]), 2, aggFun, na.rm=T) |
| 110 | + } |
| 111 | + } |
| 112 | + } else{ |
| 113 | + print(paste0("Warning: the input series for month ",mon[m]," and year ", years[y]," is empty.")) |
| 114 | + } |
| 115 | + i <- i +1 |
| 116 | + |
| 117 | + # warning if a month is not complete |
| 118 | + ndays <- lubridate::days_in_month(mon[m]) |
| 119 | + if(length(ind.dates)!=0 & ndays!= length(ind.dates)) print(paste0("Warning: the input series for month ",mon[m]," and year ", years[y]," is not complete. The aggregation is calculated with the available data")) |
| 120 | + |
| 121 | + } |
| 122 | + } |
| 123 | + } |
| 124 | + |
| 125 | + # Aggregate to seasonal |
| 126 | + if(agg=="S"){ |
| 127 | + seas <- matrix(c(12,1:11), nrow=4, ncol=3, byrow=TRUE) # DJF, MAM, JJA, SON in rows |
| 128 | + ind.seas <- list() |
| 129 | + for(s in 1:nrow(seas)){ |
| 130 | + aux <- lapply(seas[s,], function(x) which(as.numeric(format(dates,'%m')) %in% x)) |
| 131 | + ind.seas[[s]] <- aux |
| 132 | + } |
| 133 | + i <- 1 |
| 134 | + |
| 135 | + data.agg <- matrix(NA, nrow=length(years)*nrow(seas), ncol=ncol(data)) |
| 136 | + for (y in 1:length(years)){ |
| 137 | + for(s in 1:nrow(seas)){ |
| 138 | + if(s==1){ # winter, take December from previous year |
| 139 | + if(y==1){ # first year no December |
| 140 | + ind.dates <- c(intersect(ind.year[[y]], sort(unlist(ind.seas[[s]][[2]]))), |
| 141 | + intersect(ind.year[[y]], sort(unlist(ind.seas[[s]][[3]])))) |
| 142 | + } else{ |
| 143 | + ind.dates <- c(intersect(ind.year[[y-1]], sort(unlist(ind.seas[[s]][[1]]))), |
| 144 | + intersect(ind.year[[y]], sort(unlist(ind.seas[[s]][[2]]))), |
| 145 | + intersect(ind.year[[y]], sort(unlist(ind.seas[[s]][[3]])))) |
| 146 | + } |
| 147 | + } else{ |
| 148 | + ind.dates <- intersect(ind.year[[y]], sort(unlist(ind.seas[[s]]))) |
| 149 | + } |
| 150 | + |
| 151 | + if(length(ind.dates)!=0) { # if not empty, save data and dates of the first time step (because the whole season may not be included) |
| 152 | + for(j in 1:ncol(data)){ # if all data is NA, write NA (otherwise it writes Inf) |
| 153 | + if(sum(is.na(data[ind.dates,j]))==length(ind.dates)){ |
| 154 | + data.agg[i,j] <- NA |
| 155 | + } else{ |
| 156 | + data.agg[i,j] <- apply(as.matrix(data[ind.dates,j]), 2, aggFun, na.rm=T) |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + } else{ |
| 161 | + print(paste0("Warning: the input series for season ",seas[s,1],", ",seas[s,2],", ", seas[s,3]," and year ", years[y]," is empty.")) |
| 162 | + } |
| 163 | + |
| 164 | + # warning if a month is not complete |
| 165 | + ndays <- lubridate::days_in_month(seas[s,1]) + lubridate::days_in_month(seas[s,2]) + lubridate::days_in_month(seas[s,3]) |
| 166 | + if(length(ind.dates)!=0 & ndays!= length(ind.dates)) print(paste0("Warning: the input series for season ",seas[s,1],", ",seas[s,2],", ", seas[s,3]," and year ", years[y]," is not complete. The aggregation is calculated with the available data")) |
| 167 | + |
| 168 | + i <- i +1 |
| 169 | + } |
| 170 | + } |
| 171 | + } |
| 172 | + |
| 173 | + # Aggregate to annual |
| 174 | + if(agg=="Y"){ |
| 175 | + data.agg <- matrix(NA, nrow=length(dates.agg), ncol=ncol(data)) |
| 176 | + for (y in 1:length(years)){ |
| 177 | + ind.dates <- ind.year[[y]] |
| 178 | + #data.agg[y,] <- apply(as.matrix(data[ind.dates,]), 2, aggFun, na.rm=T) |
| 179 | + for(j in 1:ncol(data)){ # if all data is NA, write NA (otherwise it writes Inf) |
| 180 | + if(sum(is.na(data[ind.dates,j]))==length(ind.dates)){ |
| 181 | + data.agg[y,j] <- NA |
| 182 | + } else{ |
| 183 | + data.agg[y,j] <- apply(as.matrix(data[ind.dates,j]), 2, aggFun, na.rm=T) |
| 184 | + } |
| 185 | + } |
| 186 | + } |
| 187 | + } |
| 188 | + df <- data.frame(data.agg) |
| 189 | + if(!is.null(colnames(data))) colnames(df) <- stn.name |
| 190 | + res <- list(data.agg=df, dates.agg=dates.agg, agg= agg, aggFun=aggFun) |
| 191 | + return(res) |
| 192 | + |
| 193 | +} # end data.agg |
0 commit comments