PhysicalActivityOpenTools
diff --git a/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion b/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 6 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎R/ExtractFeatures.R‎
Lines changed: 11 additions & 0 deletions b/‎R/ExtractFeatures.R‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎R/ReadAndCalibrate.R‎
Lines changed: 11 additions & 3 deletions b/‎R/ReadAndCalibrate.R‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎R/aggregate_per_date.R‎
Lines changed: 28 additions & 21 deletions b/‎R/aggregate_per_date.R‎
Lines changed: 28 additions & 21 deletions
diff --git a/‎R/classify.R‎
Lines changed: 15 additions & 12 deletions b/‎R/classify.R‎
Lines changed: 15 additions & 12 deletions
diff --git a/‎R/classifySleep.R‎
Lines changed: 1 addition & 5 deletions b/‎R/classifySleep.R‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎R/deriveTimestamps.R‎
Lines changed: 11 additions & 9 deletions b/‎R/deriveTimestamps.R‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎R/impute_gaps_epoch_level.R‎
Lines changed: 92 additions & 0 deletions b/‎R/impute_gaps_epoch_level.R‎
Lines changed: 92 additions & 0 deletions
@@ -1,7 +1,7 @@
 Package: actimetric
 Type: Package
 Title: Classifies Accelerometer Data Into Physical Activity Types
-Version: 0.1.4
+Version: 0.1.5
 Authors@R: c(person("Jairo H","Migueles", role = c("aut","cre"),
                     email = "[email protected]",
                     comment = c(ORCID = "0000-0003-0366-6935")),
 
@@ -18,6 +18,7 @@ export(featuresThigh)
 export(featuresTrost2017)
 export(featuresTrost2018)
 export(getBout)
+export(impute_gaps_epoch_level)
 export(inbed)
 export(read.activpal)
 export(runActimetric)
 
@@ -1,3 +1,9 @@
+# actimetric 0.1.5
+
+* Time zone consideration for building the time stamps (new argument: tz). #69
+* Gaps in raw data longer than 90 minutes are now imputed once the data have been aggregated. #69
+* Small fixes to warning messaging related to sleep detection using data collected on hip. 
+
 # actimetric 0.1.4
 
 * Time series: 
 
@@ -82,5 +82,16 @@ ExtractFeatures = function(data, classifier = NULL, sf = NULL, epoch = NULL, ID
   }
   # merge basic features with features
   rownames(features) = 1:nrow(features)
+  features = as.data.frame(features)
+  # Lag-lead features if needed
+  if (grepl("lag-lead", classifier, ignore.case = TRUE)) {
+    lagsd1 = c(0, features$vm.sd[1:c(nrow(features) - 1)])
+    lagsd2 = c(0, 0, features$vm.sd[1:c(nrow(features) - 2)])
+    leadsd1 = c(features$vm.sd[2:nrow(features)], 0)
+    leadsd2 = c(features$vm.sd[3:nrow(features)], 0, 0)
+    combsd = apply(cbind(lagsd1, lagsd2, leadsd1, leadsd2), 1, sd)
+    laglead = cbind(lagsd1, lagsd2, leadsd1, leadsd2, combsd)
+    features = as.data.frame(cbind(features, laglead))
+  }
   return(features)
 }
@@ -14,6 +14,12 @@
 #' @param epoch Number with the desired epoch length for the aggregation in seconds.
 #' @param isLastBlock Logical indicating if this is the last chunk of data to be read in the file.
 #' @param S Leftover data from the previous iteration, to be appended to the current chunk of data being read.
+#' @param tz A character string specifying the time zone to be used for the conversion.
+#'   Examples include `"UTC"`, `"America/New_York"`, or `"Europe/Berlin"`.
+#'   If not specified, the system's default time zone is used. Time zone handling affects
+#'   how character or numeric inputs are interpreted and displayed.
+#'   A full list of time zone identifiers can be found on
+#'   [Wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones).
 #'
 #' @description
 #' Function aimed to read accelerometer raw data. At the moment,
@@ -31,7 +37,7 @@
 ReadAndCalibrate = function(file, sf, blocksize, blocknumber, inspectfileobject,
                             PreviousEndPage, PreviousLastValue, PreviousLastTime,
                             isLastBlock, do.calibration, iteration, epoch, S,
-                            verbose) {
+                            tz = "", verbose) {
   remaining_epochs = NULL
   # -------------------------------------------------------------------------
   # MODULE 1 - READ CHUNK OF DATA -------------------------------------------
@@ -46,7 +52,8 @@ ReadAndCalibrate = function(file, sf, blocksize, blocknumber, inspectfileobject,
                                   PreviousEndPage = PreviousEndPage,
                                   inspectfileobject = inspectfileobject,
                                   PreviousLastValue = PreviousLastValue,
-                                  PreviousLastTime = PreviousLastTime)
+                                  PreviousLastTime = PreviousLastTime,
+                                  desiredtz = tz)
     # information for next iteration
     blocknumber = blocknumber + 1; count = count + 1
     isLastBlock = accread$isLastBlock
@@ -103,7 +110,7 @@ ReadAndCalibrate = function(file, sf, blocksize, blocknumber, inspectfileobject,
       starttime = GGIR::g.getstarttime(datafile = file, data = data,
                                        mon = inspectfileobject$monc,
                                        dformat = inspectfileobject$dformc,
-                                       desiredtz = "",
+                                       desiredtz = tz,
                                        configtz = NULL)
       trunc_start = !starttime$sec %in% seq(0, 60, by = epoch)
       if (trunc_start == TRUE) {
@@ -144,6 +151,7 @@ ReadAndCalibrate = function(file, sf, blocksize, blocknumber, inspectfileobject,
       }
       data = rbind(S,data)
     }
+    # ----- End of handle time gaps between chunks -----
     # 4 - Store data that  will be added to next block
     LD = nrow(data)
     if (LD >= (3600*sf)) { # if there is more than 1 hour of data...
 
@@ -88,7 +88,6 @@ aggregate_per_date = function(tsDir, epoch, classifier, classes,
       dsnames[ci] = paste("dur", "total", "nighttime", "min", sep = "_")
       ci = ci + 1
     }
-
     # total minutes in classes
     ci2 = ci + length(classes) - 1
     time_in_classes = aggregate(activity ~ date, data = ts, FUN = min_in_class, epoch = epoch)
@@ -105,32 +104,40 @@ aggregate_per_date = function(tsDir, epoch, classifier, classes,
     }
     if ("nighttime.awake" %in% classes) {
       noons = which(ts$time == "12:00:00")
-      start_end_nighttime = find_start_end(ts, column = "activity",
-                                           class = c("nighttime.awake", "nighttime.sleep"))
-      start_end_nighttime_dates = NULL
-      for (ni in 1:length(start_end_nighttime$ends)) {
-        next_noon = which(noons > start_end_nighttime$ends[ni])[1]
-        if (is.na(next_noon)) {
-          # if there is not a next_noon, meaning that recording finished before 12pm
-          # following the last wake up
-          prev_noon = max(which(noons < start_end_nighttime$ends[ni]))
-          start_end_nighttime_dates[ni] = as.character(as.Date(ts$date[noons[prev_noon]]) + 1)
-        } else {
-          start_end_nighttime_dates[ni] = ts$date[noons[next_noon]]
+      if (sum(grepl("nighttime", ts$activity)) > 0) {
+        # if sleep periods have been detected...
+        start_end_nighttime = find_start_end(ts, column = "activity",
+                                             class = c("nighttime.awake", "nighttime.sleep"))
+        start_end_nighttime_dates = NULL
+        for (ni in 1:length(start_end_nighttime$ends)) {
+          next_noon = which(noons > start_end_nighttime$ends[ni])[1]
+          if (is.na(next_noon)) {
+            # if there is not a next_noon, meaning that recording finished before 12pm
+            # following the last wake up
+            prev_noon = max(which(noons < start_end_nighttime$ends[ni]))
+            start_end_nighttime_dates[ni] = as.character(as.Date(ts$date[noons[prev_noon]]) + 1)
+          } else {
+            start_end_nighttime_dates[ni] = ts$date[noons[next_noon]]
+          }
         }
+        # start_end_nighttime_dates = ts$date[start_end_nighttime$ends] # dates based on wakeup
+        rows2fill = which(availableDates %in% start_end_nighttime_dates)
+        ds[rows2fill, ci] = as.character(ts$timestamp[start_end_nighttime$starts])
+        ds[rows2fill, ci + 1] = as.character(ts$timestamp[start_end_nighttime$ends])
+        dsnames[ci:(ci + 1)] = paste("timestamp", c("sleepOnset", "wakeup"), sep = "_")
+        ci = ci + 2
+      } else {
+        # sleep periods have not been detected (e.g., participant removed devices all nights)
+        # only store names to be consistent in columns in the full dataset,
+        # but leave all data as NA
+        dsnames[ci:(ci + 1)] = paste("timestamp", c("sleepOnset", "wakeup"), sep = "_")
+        ci = ci + 2
       }
-      # start_end_nighttime_dates = ts$date[start_end_nighttime$ends] # dates based on wakeup
-      rows2fill = which(availableDates %in% start_end_nighttime_dates)
-      ds[rows2fill, ci] = as.character(ts$timestamp[start_end_nighttime$starts])
-      ds[rows2fill, ci + 1] = as.character(ts$timestamp[start_end_nighttime$ends])
-      dsnames[ci:(ci + 1)] = paste("timestamp", c("sleepOnset", "wakeup"), sep = "_")
-      ci = ci + 2
     }
-
     # bouts of behaviors
     boutdur = sort(boutdur, decreasing = TRUE)
     for (classi in classes) {
-      if (grepl("^nighttime|^nonwear", classes[classi])) break
+      if (grepl("^nighttime|^nonwear", classi)) break
       for (boutduri in 1:length(boutdur)) {
         look4bouts = ifelse(ts$activity == classi, 1, 0)
         # getBout is a copy of GGIR::g.getbout with which we are experimenting
 
@@ -19,6 +19,13 @@
 #' @param starttime Start time for the recording as extracted from \link{ReadAndCalibrate}
 #' @param data Raw data as read by \link{ReadAndCalibrate}
 #' @param parameters List with the definition of the parameters of the function.
+#' @param remaining_epochs Vector of lenght nrow(data) with information about the epochs that are to be imputed at epoch level.
+#' @param tz A character string specifying the time zone to be used for the conversion.
+#'   Examples include `"UTC"`, `"America/New_York"`, or `"Europe/Berlin"`.
+#'   If not specified, the system's default time zone is used. Time zone handling affects
+#'   how character or numeric inputs are interpreted and displayed.
+#'   A full list of time zone identifiers can be found on
+#'   [Wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones).
 #'
 #' @return Function does not return anything, it only generates the reports and
 #' visualizations in the \code{output_directory}.
@@ -32,7 +39,8 @@
 #' @author Jairo H. Migueles <[email protected]>
 classify = function(data = NULL, parameters = NULL, sf = NULL,
                     classifier = NULL, infoClassifier = NULL,
-                    ID = NULL, starttime = NULL) {
+                    ID = NULL, starttime = NULL,
+                    remaining_epochs = NULL, tz = "") {
   # -------------------------------------------------------------------------
   # Original code provided by Matthew N. Ahmadi
   # Jairo H. Migueles cleaned the code and isolated the classify function here
@@ -54,21 +62,16 @@ classify = function(data = NULL, parameters = NULL, sf = NULL,
   ts = ExtractFeatures(data, classifier = classifier,
                        epoch = epoch, sf = sf,
                        ID = ID)
-  ts = as.data.frame(ts)
   rm(data); gc()
-  # Lag-lead features if needed
-  if (grepl("lag-lead", classifier, ignore.case = TRUE)) {
-    lagsd1 = c(0, ts$vm.sd[1:c(nrow(ts) - 1)])
-    lagsd2 = c(0, 0, ts$vm.sd[1:c(nrow(ts) - 2)])
-    leadsd1 = c(ts$vm.sd[2:nrow(ts)], 0)
-    leadsd2 = c(ts$vm.sd[3:nrow(ts)], 0, 0)
-    combsd = apply(cbind(lagsd1, lagsd2, leadsd1, leadsd2), 1, sd)
-    laglead = cbind(lagsd1, lagsd2, leadsd1, leadsd2, combsd)
-    ts = as.data.frame(cbind(ts, laglead))
+  # impute long gaps, if any
+  longgaps2fill = which(remaining_epochs > 1)
+  if (length(longgaps2fill) > 0) { # there are periods of the signal to impute
+    # last observation carried forward applied by default
+    ts = impute_gaps_epoch_level(ts, remaining_epochs = remaining_epochs)
   }
   # Timestamp and ID
   if (!is.null(starttime)) {
-    timestamp = deriveTimestamps(from = starttime, length = nrow(ts), epoch = epoch)
+    timestamp = deriveTimestamps(from = starttime, length = nrow(ts), epoch = epoch, tz = tz)
     if (!is.null(ID)) subject = rep(ID, nrow(ts)) else subject = NA
     ts = as.data.frame(cbind(subject, timestamp, ts))
   }
 
@@ -3,8 +3,7 @@
 #' @description
 #' Function to classify nighttime and sleep in the time series.
 #'
-#' @param anglez Angle for the z axis relative to the horizontal plane.
-#' @param starttime Start time as exported from \link{ReadAndCalibrate}
+#' @param anglez Data frame with 3 columns: date, time, and angle for the z axis relative to the horizontal plane.
 #' @param classifier Character (default = NULL) indicating the classifier to be used
 #' (available options are:
 #' Preschool Wrist Random Forest Free Living,
@@ -35,9 +34,6 @@ classifySleep = function(anglez, starttime, classifier, infoClassifier, ts, do.s
   # DETECT SLEEP -----------------------
   # Using variability of angle z as in GGIR.
   if (do.sleep == TRUE) {
-    # derive timestamp for anglez
-    ts_anglez = deriveTimestamps(from = starttime, length = length(anglez), epoch = 5)
-    anglez = data.frame(date = ts_anglez[, 1], time = ts_anglez[, 2], anglez = anglez)
     # get classes information
     ts$sleep_windows_orig = ts$sleep_periods = ts$nighttime = 0
     nighttime_id = length(classes) + 1
 
@@ -4,20 +4,22 @@
 #' @param from Numeric with starting time for timestamps in UTC format.
 #' @param length Numeric with the length of the desired timestamp.
 #' @param epoch Numeric with epoch length in seconds.
+#' @param tz A character string specifying the time zone to be used for the conversion.
+#'   Examples include `"UTC"`, `"America/New_York"`, or `"Europe/Berlin"`.
+#'   If not specified, the system's default time zone is used. Time zone handling affects
+#'   how character or numeric inputs are interpreted and displayed.
+#'   A full list of time zone identifiers can be found on
+#'   [Wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones).
 #'
 #' @return Timestamp in "%Y-%m-%d %H:%M:%OS" format
 #' @export
 #' @author Jairo H. Migueles <[email protected]>
 #' @author Matthew N. Ahmadi <[email protected]>
 #'
-deriveTimestamps = function(from, length, epoch) {
-  s.t2 = from + epoch*(0:(length - 1))
-  class(s.t2) = c('POSIXt','POSIXct')
-  date = format(s.t2, "%Y-%m-%d")
-  time = format(s.t2, "%H:%M:%OS")
-  # NAs = which(is.na(time))
-  # if (length(NAs) > 0) time[NAs] = "00:00:00.000"
-  # # s.date = format(s.time2, "%Y-%m-%d")
-  # # s.t2 = format(s.time2, "%H:%M:%OS")
+deriveTimestamps = function(from, length, epoch, tz = "") {
+  s.t2_numeric = from + epoch*(0:(length - 1))
+  s.t2 = as.POSIXct(s.t2_numeric, origin = "1970-1-1", tz = tz)
+  date = format(s.t2, "%Y-%m-%d", tz = tz)
+  time = format(s.t2, "%H:%M:%OS", tz = tz)
   return(cbind(date, time))
 }
@@ -0,0 +1,92 @@
+#' Fill Gaps in Data Using Imputation (LOCF or Set-Value)
+#'
+#' This helper function fills gaps in time series or vector data based on the `remaining_epochs` vector.
+#' Gaps can be filled using Last Observation Carried Forward (LOCF) or a user-defined constant.
+#'
+#' @param ... Either a data frame, or one or more named numeric vectors of equal length.
+#' @param remaining_epochs An integer vector of the same length as the input data.
+#'        Each value represents how many times each observation (including the original)
+#'        should appear in the result.
+#' @param impute_strategy Character string, either `"locf"` (default) or `"set-value"`.
+#'        Determines how gap rows are filled:
+#'        - `"locf"` repeats the last observed value(s).
+#'        - `"set-value"` fills gap rows with the constant provided in `value`.
+#' @param value A single numeric value used to fill gaps when `impute_strategy = "set-value"`.
+#'        Required in that case; ignored for `"locf"`.
+#'
+#' @return A data frame (if multiple columns) or vector (if one column),
+#'         with the appropriate number of rows and gap values filled.
+#'
+#' @details
+#' This function avoids full memory expansion of raw time series. Instead, it builds the filled
+#' result incrementally and supports efficient handling of imputation for gaps defined by
+#' `remaining_epochs`. It's especially helpful in constrained environments or with large data.
+#'
+#' @examples
+#' # LOCF with data frame
+#' df = data.frame(x = 1:3, y = c(10, 20, 30))
+#' impute_gaps_epoch_level(df, remaining_epochs = c(1, 3, 2))
+#'
+#' # LOCF with a vector
+#' impute_gaps_epoch_level(c(5, 6, 7), remaining_epochs = c(2, 1, 3))
+#'
+#' # Set-value with a single vector
+#' impute_gaps_epoch_level(c(1, 2), remaining_epochs = c(3, 1), impute_strategy = "set-value", value = 99)
+#'
+#' # Set-value with multiple vectors
+#' impute_gaps_epoch_level(x = c(1, 2), y = c(10, 20), remaining_epochs = c(2, 2),
+#'                       impute_strategy = "set-value", value = 0)
+#' @export
+impute_gaps_epoch_level = function(..., remaining_epochs,
+                                  impute_strategy = "locf",
+                                  value = NULL) {
+  inputs = list(...)
+
+  # Determine data source: data frame or multiple vectors
+  if (length(inputs) == 1 && is.data.frame(inputs[[1]])) {
+    data = inputs[[1]]
+  } else {
+    data = as.data.frame(inputs)
+  }
+
+  stopifnot(impute_strategy %in% c("locf", "set-value"))
+  if (impute_strategy == "set-value" && is.null(value)) {
+    stop("You must provide a 'value' when using impute_strategy = 'set-value'.")
+  }
+
+  total_rows = sum(remaining_epochs)
+  filled_list = vector("list", total_rows)
+  index = 1
+
+  for (i in seq_len(nrow(data))) {
+    reps = remaining_epochs[i]
+
+    # Always include the original row
+    filled_list[[index]] = data[i, , drop = FALSE]
+    index = index + 1
+
+    # For additional rows, fill based on strategy
+    if (reps > 1) {
+      if (impute_strategy == "locf") {
+        for (j in 2:reps) {
+          filled_list[[index]] = data[i, , drop = FALSE]
+          index = index + 1
+        }
+      } else if (impute_strategy == "set-value") {
+        for (j in 2:reps) {
+          filled_list[[index]] = as.data.frame(lapply(data[i, , drop = FALSE], function(x) value))
+          index = index + 1
+        }
+      }
+    }
+  }
+
+  result = do.call(rbind, filled_list)
+  rownames(result) = NULL
+
+  if (ncol(result) == 1) {
+    return(result[[1]])
+  } else {
+    return(result)
+  }
+}