Skip to content

Commit b735928

Browse files
authored
Issue69 time zone and gaps (#70)
Fixes #69
2 parents 87a0b7a + a6b56ff commit b735928

17 files changed

+385
-88
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: actimetric
22
Type: Package
33
Title: Classifies Accelerometer Data Into Physical Activity Types
4-
Version: 0.1.4
4+
Version: 0.1.5
55
Authors@R: c(person("Jairo H","Migueles", role = c("aut","cre"),
66
email = "[email protected]",
77
comment = c(ORCID = "0000-0003-0366-6935")),

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export(featuresThigh)
1818
export(featuresTrost2017)
1919
export(featuresTrost2018)
2020
export(getBout)
21+
export(impute_gaps_epoch_level)
2122
export(inbed)
2223
export(read.activpal)
2324
export(runActimetric)

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# actimetric 0.1.5
2+
3+
* Time zone consideration for building the time stamps (new argument: tz). #69
4+
* Gaps in raw data longer than 90 minutes are now imputed once the data have been aggregated. #69
5+
* Small fixes to warning messaging related to sleep detection using data collected on hip.
6+
17
# actimetric 0.1.4
28

39
* Time series:

R/ExtractFeatures.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,16 @@ ExtractFeatures = function(data, classifier = NULL, sf = NULL, epoch = NULL, ID
8282
}
8383
# merge basic features with features
8484
rownames(features) = 1:nrow(features)
85+
features = as.data.frame(features)
86+
# Lag-lead features if needed
87+
if (grepl("lag-lead", classifier, ignore.case = TRUE)) {
88+
lagsd1 = c(0, features$vm.sd[1:c(nrow(features) - 1)])
89+
lagsd2 = c(0, 0, features$vm.sd[1:c(nrow(features) - 2)])
90+
leadsd1 = c(features$vm.sd[2:nrow(features)], 0)
91+
leadsd2 = c(features$vm.sd[3:nrow(features)], 0, 0)
92+
combsd = apply(cbind(lagsd1, lagsd2, leadsd1, leadsd2), 1, sd)
93+
laglead = cbind(lagsd1, lagsd2, leadsd1, leadsd2, combsd)
94+
features = as.data.frame(cbind(features, laglead))
95+
}
8596
return(features)
8697
}

R/ReadAndCalibrate.R

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@
1414
#' @param epoch Number with the desired epoch length for the aggregation in seconds.
1515
#' @param isLastBlock Logical indicating if this is the last chunk of data to be read in the file.
1616
#' @param S Leftover data from the previous iteration, to be appended to the current chunk of data being read.
17+
#' @param tz A character string specifying the time zone to be used for the conversion.
18+
#' Examples include `"UTC"`, `"America/New_York"`, or `"Europe/Berlin"`.
19+
#' If not specified, the system's default time zone is used. Time zone handling affects
20+
#' how character or numeric inputs are interpreted and displayed.
21+
#' A full list of time zone identifiers can be found on
22+
#' [Wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones).
1723
#'
1824
#' @description
1925
#' Function aimed to read accelerometer raw data. At the moment,
@@ -31,7 +37,7 @@
3137
ReadAndCalibrate = function(file, sf, blocksize, blocknumber, inspectfileobject,
3238
PreviousEndPage, PreviousLastValue, PreviousLastTime,
3339
isLastBlock, do.calibration, iteration, epoch, S,
34-
verbose) {
40+
tz = "", verbose) {
3541
remaining_epochs = NULL
3642
# -------------------------------------------------------------------------
3743
# MODULE 1 - READ CHUNK OF DATA -------------------------------------------
@@ -46,7 +52,8 @@ ReadAndCalibrate = function(file, sf, blocksize, blocknumber, inspectfileobject,
4652
PreviousEndPage = PreviousEndPage,
4753
inspectfileobject = inspectfileobject,
4854
PreviousLastValue = PreviousLastValue,
49-
PreviousLastTime = PreviousLastTime)
55+
PreviousLastTime = PreviousLastTime,
56+
desiredtz = tz)
5057
# information for next iteration
5158
blocknumber = blocknumber + 1; count = count + 1
5259
isLastBlock = accread$isLastBlock
@@ -103,7 +110,7 @@ ReadAndCalibrate = function(file, sf, blocksize, blocknumber, inspectfileobject,
103110
starttime = GGIR::g.getstarttime(datafile = file, data = data,
104111
mon = inspectfileobject$monc,
105112
dformat = inspectfileobject$dformc,
106-
desiredtz = "",
113+
desiredtz = tz,
107114
configtz = NULL)
108115
trunc_start = !starttime$sec %in% seq(0, 60, by = epoch)
109116
if (trunc_start == TRUE) {
@@ -144,6 +151,7 @@ ReadAndCalibrate = function(file, sf, blocksize, blocknumber, inspectfileobject,
144151
}
145152
data = rbind(S,data)
146153
}
154+
# ----- End of handle time gaps between chunks -----
147155
# 4 - Store data that will be added to next block
148156
LD = nrow(data)
149157
if (LD >= (3600*sf)) { # if there is more than 1 hour of data...

R/aggregate_per_date.R

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ aggregate_per_date = function(tsDir, epoch, classifier, classes,
8888
dsnames[ci] = paste("dur", "total", "nighttime", "min", sep = "_")
8989
ci = ci + 1
9090
}
91-
9291
# total minutes in classes
9392
ci2 = ci + length(classes) - 1
9493
time_in_classes = aggregate(activity ~ date, data = ts, FUN = min_in_class, epoch = epoch)
@@ -105,32 +104,40 @@ aggregate_per_date = function(tsDir, epoch, classifier, classes,
105104
}
106105
if ("nighttime.awake" %in% classes) {
107106
noons = which(ts$time == "12:00:00")
108-
start_end_nighttime = find_start_end(ts, column = "activity",
109-
class = c("nighttime.awake", "nighttime.sleep"))
110-
start_end_nighttime_dates = NULL
111-
for (ni in 1:length(start_end_nighttime$ends)) {
112-
next_noon = which(noons > start_end_nighttime$ends[ni])[1]
113-
if (is.na(next_noon)) {
114-
# if there is not a next_noon, meaning that recording finished before 12pm
115-
# following the last wake up
116-
prev_noon = max(which(noons < start_end_nighttime$ends[ni]))
117-
start_end_nighttime_dates[ni] = as.character(as.Date(ts$date[noons[prev_noon]]) + 1)
118-
} else {
119-
start_end_nighttime_dates[ni] = ts$date[noons[next_noon]]
107+
if (sum(grepl("nighttime", ts$activity)) > 0) {
108+
# if sleep periods have been detected...
109+
start_end_nighttime = find_start_end(ts, column = "activity",
110+
class = c("nighttime.awake", "nighttime.sleep"))
111+
start_end_nighttime_dates = NULL
112+
for (ni in 1:length(start_end_nighttime$ends)) {
113+
next_noon = which(noons > start_end_nighttime$ends[ni])[1]
114+
if (is.na(next_noon)) {
115+
# if there is not a next_noon, meaning that recording finished before 12pm
116+
# following the last wake up
117+
prev_noon = max(which(noons < start_end_nighttime$ends[ni]))
118+
start_end_nighttime_dates[ni] = as.character(as.Date(ts$date[noons[prev_noon]]) + 1)
119+
} else {
120+
start_end_nighttime_dates[ni] = ts$date[noons[next_noon]]
121+
}
120122
}
123+
# start_end_nighttime_dates = ts$date[start_end_nighttime$ends] # dates based on wakeup
124+
rows2fill = which(availableDates %in% start_end_nighttime_dates)
125+
ds[rows2fill, ci] = as.character(ts$timestamp[start_end_nighttime$starts])
126+
ds[rows2fill, ci + 1] = as.character(ts$timestamp[start_end_nighttime$ends])
127+
dsnames[ci:(ci + 1)] = paste("timestamp", c("sleepOnset", "wakeup"), sep = "_")
128+
ci = ci + 2
129+
} else {
130+
# sleep periods have not been detected (e.g., participant removed devices all nights)
131+
# only store names to be consistent in columns in the full dataset,
132+
# but leave all data as NA
133+
dsnames[ci:(ci + 1)] = paste("timestamp", c("sleepOnset", "wakeup"), sep = "_")
134+
ci = ci + 2
121135
}
122-
# start_end_nighttime_dates = ts$date[start_end_nighttime$ends] # dates based on wakeup
123-
rows2fill = which(availableDates %in% start_end_nighttime_dates)
124-
ds[rows2fill, ci] = as.character(ts$timestamp[start_end_nighttime$starts])
125-
ds[rows2fill, ci + 1] = as.character(ts$timestamp[start_end_nighttime$ends])
126-
dsnames[ci:(ci + 1)] = paste("timestamp", c("sleepOnset", "wakeup"), sep = "_")
127-
ci = ci + 2
128136
}
129-
130137
# bouts of behaviors
131138
boutdur = sort(boutdur, decreasing = TRUE)
132139
for (classi in classes) {
133-
if (grepl("^nighttime|^nonwear", classes[classi])) break
140+
if (grepl("^nighttime|^nonwear", classi)) break
134141
for (boutduri in 1:length(boutdur)) {
135142
look4bouts = ifelse(ts$activity == classi, 1, 0)
136143
# getBout is a copy of GGIR::g.getbout with which we are experimenting

R/classify.R

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@
1919
#' @param starttime Start time for the recording as extracted from \link{ReadAndCalibrate}
2020
#' @param data Raw data as read by \link{ReadAndCalibrate}
2121
#' @param parameters List with the definition of the parameters of the function.
22+
#' @param remaining_epochs Vector of lenght nrow(data) with information about the epochs that are to be imputed at epoch level.
23+
#' @param tz A character string specifying the time zone to be used for the conversion.
24+
#' Examples include `"UTC"`, `"America/New_York"`, or `"Europe/Berlin"`.
25+
#' If not specified, the system's default time zone is used. Time zone handling affects
26+
#' how character or numeric inputs are interpreted and displayed.
27+
#' A full list of time zone identifiers can be found on
28+
#' [Wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones).
2229
#'
2330
#' @return Function does not return anything, it only generates the reports and
2431
#' visualizations in the \code{output_directory}.
@@ -32,7 +39,8 @@
3239
#' @author Jairo H. Migueles <[email protected]>
3340
classify = function(data = NULL, parameters = NULL, sf = NULL,
3441
classifier = NULL, infoClassifier = NULL,
35-
ID = NULL, starttime = NULL) {
42+
ID = NULL, starttime = NULL,
43+
remaining_epochs = NULL, tz = "") {
3644
# -------------------------------------------------------------------------
3745
# Original code provided by Matthew N. Ahmadi
3846
# Jairo H. Migueles cleaned the code and isolated the classify function here
@@ -54,21 +62,16 @@ classify = function(data = NULL, parameters = NULL, sf = NULL,
5462
ts = ExtractFeatures(data, classifier = classifier,
5563
epoch = epoch, sf = sf,
5664
ID = ID)
57-
ts = as.data.frame(ts)
5865
rm(data); gc()
59-
# Lag-lead features if needed
60-
if (grepl("lag-lead", classifier, ignore.case = TRUE)) {
61-
lagsd1 = c(0, ts$vm.sd[1:c(nrow(ts) - 1)])
62-
lagsd2 = c(0, 0, ts$vm.sd[1:c(nrow(ts) - 2)])
63-
leadsd1 = c(ts$vm.sd[2:nrow(ts)], 0)
64-
leadsd2 = c(ts$vm.sd[3:nrow(ts)], 0, 0)
65-
combsd = apply(cbind(lagsd1, lagsd2, leadsd1, leadsd2), 1, sd)
66-
laglead = cbind(lagsd1, lagsd2, leadsd1, leadsd2, combsd)
67-
ts = as.data.frame(cbind(ts, laglead))
66+
# impute long gaps, if any
67+
longgaps2fill = which(remaining_epochs > 1)
68+
if (length(longgaps2fill) > 0) { # there are periods of the signal to impute
69+
# last observation carried forward applied by default
70+
ts = impute_gaps_epoch_level(ts, remaining_epochs = remaining_epochs)
6871
}
6972
# Timestamp and ID
7073
if (!is.null(starttime)) {
71-
timestamp = deriveTimestamps(from = starttime, length = nrow(ts), epoch = epoch)
74+
timestamp = deriveTimestamps(from = starttime, length = nrow(ts), epoch = epoch, tz = tz)
7275
if (!is.null(ID)) subject = rep(ID, nrow(ts)) else subject = NA
7376
ts = as.data.frame(cbind(subject, timestamp, ts))
7477
}

R/classifySleep.R

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
#' @description
44
#' Function to classify nighttime and sleep in the time series.
55
#'
6-
#' @param anglez Angle for the z axis relative to the horizontal plane.
7-
#' @param starttime Start time as exported from \link{ReadAndCalibrate}
6+
#' @param anglez Data frame with 3 columns: date, time, and angle for the z axis relative to the horizontal plane.
87
#' @param classifier Character (default = NULL) indicating the classifier to be used
98
#' (available options are:
109
#' Preschool Wrist Random Forest Free Living,
@@ -35,9 +34,6 @@ classifySleep = function(anglez, starttime, classifier, infoClassifier, ts, do.s
3534
# DETECT SLEEP -----------------------
3635
# Using variability of angle z as in GGIR.
3736
if (do.sleep == TRUE) {
38-
# derive timestamp for anglez
39-
ts_anglez = deriveTimestamps(from = starttime, length = length(anglez), epoch = 5)
40-
anglez = data.frame(date = ts_anglez[, 1], time = ts_anglez[, 2], anglez = anglez)
4137
# get classes information
4238
ts$sleep_windows_orig = ts$sleep_periods = ts$nighttime = 0
4339
nighttime_id = length(classes) + 1

R/deriveTimestamps.R

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,22 @@
44
#' @param from Numeric with starting time for timestamps in UTC format.
55
#' @param length Numeric with the length of the desired timestamp.
66
#' @param epoch Numeric with epoch length in seconds.
7+
#' @param tz A character string specifying the time zone to be used for the conversion.
8+
#' Examples include `"UTC"`, `"America/New_York"`, or `"Europe/Berlin"`.
9+
#' If not specified, the system's default time zone is used. Time zone handling affects
10+
#' how character or numeric inputs are interpreted and displayed.
11+
#' A full list of time zone identifiers can be found on
12+
#' [Wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones).
713
#'
814
#' @return Timestamp in "%Y-%m-%d %H:%M:%OS" format
915
#' @export
1016
#' @author Jairo H. Migueles <[email protected]>
1117
#' @author Matthew N. Ahmadi <[email protected]>
1218
#'
13-
deriveTimestamps = function(from, length, epoch) {
14-
s.t2 = from + epoch*(0:(length - 1))
15-
class(s.t2) = c('POSIXt','POSIXct')
16-
date = format(s.t2, "%Y-%m-%d")
17-
time = format(s.t2, "%H:%M:%OS")
18-
# NAs = which(is.na(time))
19-
# if (length(NAs) > 0) time[NAs] = "00:00:00.000"
20-
# # s.date = format(s.time2, "%Y-%m-%d")
21-
# # s.t2 = format(s.time2, "%H:%M:%OS")
19+
deriveTimestamps = function(from, length, epoch, tz = "") {
20+
s.t2_numeric = from + epoch*(0:(length - 1))
21+
s.t2 = as.POSIXct(s.t2_numeric, origin = "1970-1-1", tz = tz)
22+
date = format(s.t2, "%Y-%m-%d", tz = tz)
23+
time = format(s.t2, "%H:%M:%OS", tz = tz)
2224
return(cbind(date, time))
2325
}

R/impute_gaps_epoch_level.R

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#' Fill Gaps in Data Using Imputation (LOCF or Set-Value)
2+
#'
3+
#' This helper function fills gaps in time series or vector data based on the `remaining_epochs` vector.
4+
#' Gaps can be filled using Last Observation Carried Forward (LOCF) or a user-defined constant.
5+
#'
6+
#' @param ... Either a data frame, or one or more named numeric vectors of equal length.
7+
#' @param remaining_epochs An integer vector of the same length as the input data.
8+
#' Each value represents how many times each observation (including the original)
9+
#' should appear in the result.
10+
#' @param impute_strategy Character string, either `"locf"` (default) or `"set-value"`.
11+
#' Determines how gap rows are filled:
12+
#' - `"locf"` repeats the last observed value(s).
13+
#' - `"set-value"` fills gap rows with the constant provided in `value`.
14+
#' @param value A single numeric value used to fill gaps when `impute_strategy = "set-value"`.
15+
#' Required in that case; ignored for `"locf"`.
16+
#'
17+
#' @return A data frame (if multiple columns) or vector (if one column),
18+
#' with the appropriate number of rows and gap values filled.
19+
#'
20+
#' @details
21+
#' This function avoids full memory expansion of raw time series. Instead, it builds the filled
22+
#' result incrementally and supports efficient handling of imputation for gaps defined by
23+
#' `remaining_epochs`. It's especially helpful in constrained environments or with large data.
24+
#'
25+
#' @examples
26+
#' # LOCF with data frame
27+
#' df = data.frame(x = 1:3, y = c(10, 20, 30))
28+
#' impute_gaps_epoch_level(df, remaining_epochs = c(1, 3, 2))
29+
#'
30+
#' # LOCF with a vector
31+
#' impute_gaps_epoch_level(c(5, 6, 7), remaining_epochs = c(2, 1, 3))
32+
#'
33+
#' # Set-value with a single vector
34+
#' impute_gaps_epoch_level(c(1, 2), remaining_epochs = c(3, 1), impute_strategy = "set-value", value = 99)
35+
#'
36+
#' # Set-value with multiple vectors
37+
#' impute_gaps_epoch_level(x = c(1, 2), y = c(10, 20), remaining_epochs = c(2, 2),
38+
#' impute_strategy = "set-value", value = 0)
39+
#' @export
40+
impute_gaps_epoch_level = function(..., remaining_epochs,
41+
impute_strategy = "locf",
42+
value = NULL) {
43+
inputs = list(...)
44+
45+
# Determine data source: data frame or multiple vectors
46+
if (length(inputs) == 1 && is.data.frame(inputs[[1]])) {
47+
data = inputs[[1]]
48+
} else {
49+
data = as.data.frame(inputs)
50+
}
51+
52+
stopifnot(impute_strategy %in% c("locf", "set-value"))
53+
if (impute_strategy == "set-value" && is.null(value)) {
54+
stop("You must provide a 'value' when using impute_strategy = 'set-value'.")
55+
}
56+
57+
total_rows = sum(remaining_epochs)
58+
filled_list = vector("list", total_rows)
59+
index = 1
60+
61+
for (i in seq_len(nrow(data))) {
62+
reps = remaining_epochs[i]
63+
64+
# Always include the original row
65+
filled_list[[index]] = data[i, , drop = FALSE]
66+
index = index + 1
67+
68+
# For additional rows, fill based on strategy
69+
if (reps > 1) {
70+
if (impute_strategy == "locf") {
71+
for (j in 2:reps) {
72+
filled_list[[index]] = data[i, , drop = FALSE]
73+
index = index + 1
74+
}
75+
} else if (impute_strategy == "set-value") {
76+
for (j in 2:reps) {
77+
filled_list[[index]] = as.data.frame(lapply(data[i, , drop = FALSE], function(x) value))
78+
index = index + 1
79+
}
80+
}
81+
}
82+
}
83+
84+
result = do.call(rbind, filled_list)
85+
rownames(result) = NULL
86+
87+
if (ncol(result) == 1) {
88+
return(result[[1]])
89+
} else {
90+
return(result)
91+
}
92+
}

0 commit comments

Comments
 (0)