Skip to content

New splitting regimen #143

@WetRobot

Description

@WetRobot

Using data.table's rolling join. Something like

library("data.table")


drop <- FALSE

dt <- data.table(
  id = c(1,1,2,2),
  id_row = c(1,2, 1,2),
  t_start = c(0,2, 1,2),
  dur = c(2,1, 1,2),
  status = c("alive","dead", "alive","sick")
)
dt[, "t_stop" := t_start + dur]
setkeyv(dt, c("id", "t_stop"))
dt[, "status_lag1" := shift(status, n = 1, type = "lag", fill = NA), by = "id"]
dt[, "status_lead1" := shift(status, n = 1, type = "lead", fill = NA), by = "id"]


lim <- data.table(
  id = unique(dt$id),
  t_start = dt[!duplicated(id), t_start],
  t_stop = dt[!duplicated(id, fromLast = TRUE), t_stop]
)

breaks <- c(0, 2.5, 3.5, 10)
breaks <- sort(breaks)
breaks_range <- range(breaks)

join <- lim[, breaks[between(breaks, t_start, t_stop)], by = "id"]
setnames(join, "V1", "t_start")

keep_rows <- TRUE ## keeps all
if (drop) keep_rows <- dt[, t_start > breaks_range[1] & t_stop < breaks_range[2]]
split <- dt[keep_rows, .(id, id_row, t_start, t_stop, status)][
  i = join, 
  on = c("id","t_start"), 
  j = .(id, id_row, t_start, t_stop, status), 
  roll = +Inf, ## +Inf: rolls 
  nomatch = 0L
  ]
print(split)


Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions