@@ -1147,3 +1147,167 @@ function(directory,
11471147 seed = as_nullable_integer ))
11481148 do.call(keras $ preprocessing $ text_dataset_from_directory , args )
11491149}
1150+
1151+
1152+ # ' Creates a dataset of sliding windows over a timeseries provided as array
1153+ # '
1154+ # ' @details
1155+ # ' This function takes in a sequence of data-points gathered at
1156+ # ' equal intervals, along with time series parameters such as
1157+ # ' length of the sequences/windows, spacing between two sequence/windows, etc.,
1158+ # ' to produce batches of timeseries inputs and targets.
1159+ # '
1160+ # ' @section Example 1:
1161+ # '
1162+ # ' Consider indices `0:99`. With `sequence_length=10`, `sampling_rate=2`,
1163+ # ' `sequence_stride=3`, `shuffle=FALSE`, the dataset will yield batches of
1164+ # ' sequences composed of the following indices:
1165+ # '
1166+ # ' ```
1167+ # ' First sequence: 0 2 4 6 8 10 12 14 16 18
1168+ # ' Second sequence: 3 5 7 9 11 13 15 17 19 21
1169+ # ' Third sequence: 6 8 10 12 14 16 18 20 22 24
1170+ # ' ...
1171+ # ' Last sequence: 78 80 82 84 86 88 90 92 94 96
1172+ # ' ```
1173+ # '
1174+ # ' In this case the last 3 data points are discarded since no full sequence
1175+ # ' can be generated to include them (the next sequence would have started
1176+ # ' at index 81, and thus its last step would have gone over 99).
1177+ # '
1178+ # ' @section Example 2: Temporal regression.
1179+ # '
1180+ # ' Consider an array `data` of scalar values, of shape `(steps)`.
1181+ # ' To generate a dataset that uses the past 10
1182+ # ' timesteps to predict the next timestep, you would use:
1183+ # '
1184+ # ' ``` R
1185+ # ' steps <- 100
1186+ # ' # data is integer seq with some noise
1187+ # ' data <- array(1:steps + abs(rnorm(steps, sd = .25)))
1188+ # ' inputs_data <- head(data, -10) # drop last 10
1189+ # ' targets <- tail(data, -10) # drop first 10
1190+ # ' dataset <- timeseries_dataset_from_array(
1191+ # ' inputs_data, targets, sequence_length=10)
1192+ # ' library(tfdatasets)
1193+ # ' dataset_iterator <- as_iterator(dataset)
1194+ # ' repeat {
1195+ # ' batch <- iter_next(dataset_iterator)
1196+ # ' if(is.null(batch)) break
1197+ # ' c(input, target) %<-% batch
1198+ # ' stopifnot(exprs = {
1199+ # ' # First sequence: steps [1-10]
1200+ # ' # Corresponding target: step 11
1201+ # ' all.equal(as.array(input[1, ]), data[1:10])
1202+ # ' all.equal(as.array(target[1]), data[11])
1203+ # '
1204+ # ' all.equal(as.array(input[2, ]), data[2:11])
1205+ # ' all.equal(as.array(target[2]), data[12])
1206+ # '
1207+ # ' all.equal(as.array(input[3, ]), data[3:12])
1208+ # ' all.equal(as.array(target[3]), data[13])
1209+ # ' })
1210+ # ' }
1211+ # ' ```
1212+ # '
1213+ # ' @section Example 3: Temporal regression for many-to-many architectures.
1214+ # '
1215+ # ' Consider two arrays of scalar values `X` and `Y`,
1216+ # ' both of shape `(100)`. The resulting dataset should consist of samples with
1217+ # ' 20 timestamps each. The samples should not overlap.
1218+ # ' To generate a dataset that uses the current timestamp
1219+ # ' to predict the corresponding target timestep, you would use:
1220+ # '
1221+ # ' ``` R
1222+ # ' X <- seq(100)
1223+ # ' Y <- X*2
1224+ # '
1225+ # ' sample_length <- 20
1226+ # ' input_dataset <- timeseries_dataset_from_array(
1227+ # ' X, NULL, sequence_length=sample_length, sequence_stride=sample_length)
1228+ # ' target_dataset <- timeseries_dataset_from_array(
1229+ # ' Y, NULL, sequence_length=sample_length, sequence_stride=sample_length)
1230+ # '
1231+ # ' library(tfdatasets)
1232+ # ' dataset_iterator <-
1233+ # ' zip_datasets(input_dataset, target_dataset) %>%
1234+ # ' as_array_iterator()
1235+ # ' while(!is.null(batch <- iter_next(dataset_iterator))) {
1236+ # ' c(inputs, targets) %<-% batch
1237+ # ' stopifnot(
1238+ # ' all.equal(inputs[1,], X[1:sample_length]),
1239+ # ' all.equal(targets[1,], Y[1:sample_length]),
1240+ # ' # second sample equals output timestamps 20-40
1241+ # ' all.equal(inputs[2,], X[(1:sample_length) + sample_length]),
1242+ # ' all.equal(targets[2,], Y[(1:sample_length) + sample_length])
1243+ # ' )
1244+ # ' }
1245+ # ' ```
1246+ # '
1247+ # ' @param data array or eager tensor
1248+ # ' containing consecutive data points (timesteps).
1249+ # ' The first axis is expected to be the time dimension.
1250+ # '
1251+ # ' @param targets Targets corresponding to timesteps in `data`.
1252+ # ' `targets[i]` should be the target
1253+ # ' corresponding to the window that starts at index `i`
1254+ # ' (see example 2 below).
1255+ # ' Pass NULL if you don't have target data (in this case the dataset will
1256+ # ' only yield the input data).
1257+ # '
1258+ # ' @param sequence_length Length of the output sequences (in number of timesteps).
1259+ # '
1260+ # ' @param sequence_stride Period between successive output sequences.
1261+ # ' For stride `s`, output samples would
1262+ # ' start at index `data[i]`, `data[i + s]`, `data[i + (2 * s)]`, etc.
1263+ # '
1264+ # ' @param sampling_rate Period between successive individual timesteps
1265+ # ' within sequences. For rate `r`, timesteps
1266+ # ' `data[i], data[i + r], ... data[i + sequence_length]`
1267+ # ' are used for create a sample sequence.
1268+ # '
1269+ # ' @param batch_size Number of timeseries samples in each batch
1270+ # ' (except maybe the last one).
1271+ # '
1272+ # ' @param shuffle Whether to shuffle output samples,
1273+ # ' or instead draw them in chronological order.
1274+ # '
1275+ # ' @param seed Optional int; random seed for shuffling.
1276+ # '
1277+ # ' @param start_index Optional int; data points earlier (exclusive)
1278+ # ' than `start_index` will not be used
1279+ # ' in the output sequences. This is useful to reserve part of the
1280+ # ' data for test or validation.
1281+ # '
1282+ # ' @param end_index Optional int; data points later (exclusive) than `end_index`
1283+ # ' will not be used in the output sequences.
1284+ # ' This is useful to reserve part of the data for test or validation.
1285+ # '
1286+ # ' @param ... For backwards and forwards compatibility, ignored presently.
1287+ # '
1288+ # ' @seealso
1289+ # ' + <https://www.tensorflow.org/api_docs/python/tf/keras/utils/timeseries_dataset_from_array>
1290+ # '
1291+ # ' @returns A `tf.data.Dataset` instance. If `targets` was passed, the
1292+ # ' dataset yields batches of two items: `(batch_of_sequences,
1293+ # ' batch_of_targets)`. If not, the dataset yields only
1294+ # ' `batch_of_sequences`.
1295+ # '
1296+ # ' @export
1297+ timeseries_dataset_from_array <-
1298+ function (data , targets , sequence_length , sequence_stride = 1L ,
1299+ sampling_rate = 1L , batch_size = 128L , shuffle = FALSE , ... ,
1300+ seed = NULL , start_index = NULL , end_index = NULL )
1301+ {
1302+ require_tf_version(" 2.6" , " timeseries_dataset_from_array" )
1303+ args <- capture_args(match.call(), list (
1304+ sequence_length = as.integer ,
1305+ sequence_stride = as.integer ,
1306+ sampling_rate = as.integer ,
1307+ batch_size = as.integer ,
1308+ seed = as_nullable_integer ,
1309+ start_index = as_nullable_integer ,
1310+ end_index = as_nullable_integer
1311+ ))
1312+ do.call(keras $ preprocessing $ timeseries_dataset_from_array , args )
1313+ }
0 commit comments