gshift cannot eval variables used in [ (#5548)

ben-schwen · Benjamin Schwendinger · MichaelChirico · web-flow · commit e665d2a89c3c · 2024-01-05T20:01:02.000+08:00
* add fix for escaping gforce using [variables in function call

* push

* escape gshift

* update

* add fix

* add test for coverage

* add news

* update shift tests

* move tests

* update tests

* simplify tests

* simplify

* working version

* add comments

* update test info

* add dropped DT

* add raw tests

* update tests

* add more tests for nested jsub

* add more tests

* make qforce_ok more robust

* update comments

* simplify logical

* remove comment since n is used

* update eval environment

* add Jans testcase

* add helper functions

* remove unused assignments

* update test nums

* escape evaluating values present int x

* update eval of vars

* add spaces

* overwrite call

* all.vars==0L and unique=FALSE

* add comment about noCall_noVars

* shorten switch

Co-authored-by: Michael Chirico &lt;chiricom@google.com&gt;

* add extra check to noCall_noVars

* rename noCall_noVars

* simplify switch

* update match.call gweighted.mean

* simplify

* rename zip and name args

* remove redundant switch() entry

* deduplicate code

* update g[_ok signature

* whitespace suggestion

* just check if 'give.names' in names

* Change check= to check_singleton=

* name argument

* update check constantish

* update NEWS item

* standardize spelling

* infix spacing

---------

Co-authored-by: Benjamin Schwendinger &lt;5801475-ben-schwen@users.noreply.gitlab.com&gt;
Co-authored-by: Michael Chirico &lt;chiricom@google.com&gt;
Co-authored-by: Michael Chirico &lt;michaelchirico4@gmail.com&gt;
diff --git a/NEWS.md b/NEWS.md
@@ -168,12 +168,12 @@
 
 28. `setkey()` now supports type `raw` as value columns (not as key columns), [#5100](https://github.com/Rdatatable/data.table/issues/5100). Thanks Hugh Parsonage for requesting, and Benjamin Schwendinger for the PR.
 
-29. `shift()` is now optimised by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR.
+29. `shift()` is now optimized by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR. Thanks to @neovom for testing dev and filing a bug report, [#5547](https://github.com/Rdatatable/data.table/issues/5547) which was fixed before release. This helped also in improving the logic for when to turn on optimization by group in general, making it more robust.
 
     ```R
     N = 1e7
     DT = data.table(x=sample(N), y=sample(1e6,N,TRUE))
-    shift_no_opt = shift  # different name not optimised as a way to compare
+    shift_no_opt = shift  # different name not optimized as a way to compare
     microbenchmark(
       DT[, c(NA, head(x,-1)), y],
       DT[, shift_no_opt(x, 1, type="lag"), y],
@@ -263,7 +263,7 @@
     # 2:     2     4      b
     ```
 
-34. `weighted.mean()` is now optimised by group, [#3977](https://github.com/Rdatatable/data.table/issues/3977). Thanks to @renkun-ken for requesting, and Benjamin Schwendinger for the PR.
+34. `weighted.mean()` is now optimized by group, [#3977](https://github.com/Rdatatable/data.table/issues/3977). Thanks to @renkun-ken for requesting, and Benjamin Schwendinger for the PR.
 
 35. `as.xts.data.table()` now supports non-numeric xts coredata matrixes, [5268](https://github.com/Rdatatable/data.table/issues/5268). Existing numeric only functionality is supported by a new `numeric.only` parameter, which defaults to `TRUE` for backward compatability and the most common use case. To convert non-numeric columns, set this parameter to `FALSE`. Conversions of `data.table` columns to a `matrix` now uses `data.table::as.matrix`, with all its performance benefits. Thanks to @ethanbsmith for the report and fix.
 
diff --git a/R/data.table.R b/R/data.table.R
@@ -1739,41 +1739,84 @@ replace_dot_alias = function(e) {
         GForce = FALSE
       } else {
         # Apply GForce
+        # GForce needs to evaluate all arguments not present in the data.table before calling C part #5547
+        # Safe cases: variables [i], calls without variables [c(0,1), list(1)] # TODO extend this list
+        # Unsafe cases: functions containing variables [c(i), abs(i)], .N
+        is_constantish = function(expr, check_singleton=FALSE) {
+          if (!is.call(expr)) {
+            return(!dotN(expr))
+          }
+          if (check_singleton) {
+            return(FALSE)
+          }
+          # calls are allowed <=> there's no SYMBOLs in the sub-AST
+          return(length(all.vars(expr, max.names=1L, unique=FALSE)) == 0L)
+        }
+        .gshift_ok = function(q) {
+          q = match.call(shift, q)
+          is_constantish(q[["n"]]) &&
+            is_constantish(q[["fill"]]) &&
+            is_constantish(q[["type"]]) &&
+            !"give.names" %chin% names(q)
+        }
+        .ghead_ok = function(q) {
+          length(q) == 3L &&
+            is_constantish(q[[3L]], check_singleton = TRUE)
+        }
+        `.g[_ok` = function(q, x) {
+          length(q) == 3L &&
+            is_constantish(q[[3L]], check_singleton = TRUE) &&
+            (q[[1L]] != "[[" || eval(call('is.atomic', q[[2L]]), envir=x)) &&
+            eval(q[[3L]], parent.frame(3L)) > 0L
+        }
+        .gweighted.mean_ok = function(q, x) { #3977
+          q = match.call(gweighted.mean, q)
+          is_constantish(q[["na.rm"]]) &&
+            (is.null(q[["w"]]) || eval(call('is.numeric', q[["w"]]), envir=x))
+        }
         .gforce_ok = function(q) {
           if (dotN(q)) return(TRUE) # For #334
           # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls where x is a column of .SD
           # is.symbol() is for #1369, #1974 and #2949
-          if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE)
+          if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q[[1L]]) %chin% gfuns)) return(FALSE)
           if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE)  # 875
-          if ((length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na")))) return(TRUE)
+          if (length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na") && is_constantish(q[[3L]]))) return(TRUE)
           #                       ^^ base::startWith errors on NULL unfortunately
-          if (length(q)>=2L && q[[1L]] == "shift") {
-            q_named = match.call(shift, q)
-            if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]])) return(TRUE)
-          }
-          if (length(q)>=3L && q[[1L]] == "weighted.mean") return(TRUE)  #3977
-          # otherwise there must be three arguments
-          length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) &&
-            ( (q1 %chin% c("head", "tail")) || ((q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && q3>0L) )
+          switch(as.character(q[[1L]]), 
+            "shift" = .gshift_ok(q),
+            "weighted.mean" = .gweighted.mean_ok(q, x),
+            "tail" = , "head" = .ghead_ok(q),
+            "[[" = , "[" = `.g[_ok`(q, x),
+            FALSE
+          )
         }
         if (jsub[[1L]]=="list") {
           GForce = TRUE
           for (ii in seq.int(from=2L, length.out=length(jsub)-1L)) {
             if (!.gforce_ok(jsub[[ii]])) {GForce = FALSE; break}
           }
         } else GForce = .gforce_ok(jsub)
+        gforce_jsub = function(x, names_x) {
+          x[[1L]] = as.name(paste0("g", x[[1L]]))
+          # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok
+          # do not evaluate vars present as columns in x
+          if (length(x) >= 3L) {
+            for (i in 3:length(x)) {
+              if (is.symbol(x[[i]]) && !(x[[i]] %chin% names_x)) x[[i]] = eval(x[[i]], parent.frame(2L)) # tests 1187.2 & 1187.4
+            }
+          }
+          x
+        }
         if (GForce) {
           if (jsub[[1L]]=="list")
             for (ii in seq_along(jsub)[-1L]) {
               if (dotN(jsub[[ii]])) next; # For #334
-              jsub[[ii]][[1L]] = as.name(paste0("g", jsub[[ii]][[1L]]))
-              if (length(jsub[[ii]])>=3L && is.symbol(jsub[[ii]][[3L]]) && !(jsub[[ii]][[3L]] %chin% sdvars)) jsub[[ii]][[3L]] = eval(jsub[[ii]][[3L]], parent.frame())  # tests 1187.2 & 1187.4
+              jsub[[ii]] = gforce_jsub(jsub[[ii]], names_x)
             }
           else {
             # adding argument to ghead/gtail if none is supplied to g-optimized head/tail
             if (length(jsub) == 2L && jsub[[1L]] %chin% c("head", "tail")) jsub[["n"]] = 6L
-            jsub[[1L]] = as.name(paste0("g", jsub[[1L]]))
-            if (length(jsub)>=3L && is.symbol(jsub[[3L]]) && !(jsub[[3L]] %chin% sdvars)) jsub[[3L]] = eval(jsub[[3L]], parent.frame())   # tests 1187.3 & 1187.5
+            jsub = gforce_jsub(jsub, names_x)
           }
           if (verbose) catf("GForce optimized j to '%s'\n", deparse(jsub, width.cutoff=200L, nlines=1L))
         } else if (verbose) catf("GForce is on, left j unchanged\n");
@@ -1868,7 +1911,7 @@ replace_dot_alias = function(e) {
     if (!is.symbol(jsub)) {
       headTail_arg = function(q) {
         if (length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) &&
-         (q1 <- q[[1L]]) %chin% c("ghead", "gtail") && q3!=1) q3
+         (q[[1L]]) %chin% c("ghead", "gtail") && q3!=1) q3
         else 0
       }
       if (jsub[[1L]] == "list"){
@@ -1882,6 +1925,11 @@ replace_dot_alias = function(e) {
       g = lapply(g, rep.int, times=grplens)
     } else if (.is_nrows(jsub)) {
       g = lapply(g, rep.int, times=len__)
+      # unpack list of lists for nrows functions
+      zip_items = function(ll) do.call(mapply, c(list(FUN = c), ll, SIMPLIFY=FALSE, USE.NAMES=FALSE))
+      if (all(vapply_1b(ans, is.list))) {
+        ans = lapply(ans, zip_items)
+      }
     }
     ans = c(g, ans)
   } else {
@@ -3000,13 +3048,13 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) {
 gfuns = c("[", "[[", "head", "tail", "first", "last", "sum", "mean", "prod",
           "median", "min", "max", "var", "sd", ".N", "shift", "weighted.mean") # added .N for #334
 `g[` = `g[[` = function(x, n) .Call(Cgnthvalue, x, as.integer(n)) # n is of length=1 here.
-ghead = function(x, n) .Call(Cghead, x, as.integer(n)) # n is not used at the moment
-gtail = function(x, n) .Call(Cgtail, x, as.integer(n)) # n is not used at the moment
+ghead = function(x, n) .Call(Cghead, x, as.integer(n))
+gtail = function(x, n) .Call(Cgtail, x, as.integer(n))
 gfirst = function(x) .Call(Cgfirst, x)
 glast = function(x) .Call(Cglast, x)
 gsum = function(x, na.rm=FALSE) .Call(Cgsum, x, na.rm)
 gmean = function(x, na.rm=FALSE) .Call(Cgmean, x, na.rm)
-gweighted.mean = function(x, w, na.rm=FALSE) {
+gweighted.mean = function(x, w, ..., na.rm=FALSE) {
   if (missing(w)) gmean(x, na.rm)
   else {
     if (na.rm) { # take those indices out of the equation by setting them to 0
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -6736,6 +6736,18 @@ test(1463.69, shift(x, -6, type="cyclic"), shift(x, -1, type="cyclic"))
 # test warning
 test(1463.70, shift(x, 1, fill=1, type="cyclic"), c(5L, 1L:4L), warning="Provided argument fill=1 will be ignored since type='shift'.")
 
+# test raw #5547
+x=as.raw(1:5)
+test(1463.71, shift(x,1L),                   as.raw(c(0L, 1:4)))
+test(1463.72, shift(x,1:2),                  list(as.raw(c(0L, 1:4)), as.raw(c(0L, 0L, 1:3))))
+test(1463.73, shift(x,1L, fill=0L),               as.raw(c(0L, 1:4)))
+test(1463.74, shift(x,1L,    type="lead"),   as.raw(c(2:5, 0L)))
+test(1463.75, shift(x,1:2,   type="lead"),   list(as.raw(c(2:5, 0L)), as.raw(c(3:5, 0L, 0L))))
+test(1463.76, shift(x,1L, fill=0L,type="lead"),   as.raw(c(2:5, 0L)))
+test(1463.77, shift(x,1L,    type="cyclic"), as.raw(c(5, 1:4)))
+test(1463.78, shift(x,1:2,   type="cyclic"), list(as.raw(c(5, 1:4)), as.raw(c(4:5, 1:3))))
+test(1463.79, shift(x,-1L,   type="cyclic"), as.raw(c(2:5, 1)))
+test(1463.80, shift(x,-(1:2),type="cyclic"), list(as.raw(c(2:5, 1)), as.raw(c(3:5,1:2))))
 
 # FR #686
 DT = data.table(a=rep(c("A", "B", "C", "A", "B"), c(2,2,3,1,2)), foo=1:10)
@@ -13628,7 +13640,8 @@ test(1963.07, shift(DT, -1:1),
           c(NA, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L)))
 ## some coverage tests for good measure
 test(1963.08, shift(DT$x, type = 'some_other_type'), error='should be one of.*lag.*lead')
-test(1963.09, shift(as.raw(0:1)), error = "Type 'raw' is not supported")
+test(1963.09, shift(as.raw(0:1)), as.raw(c(0,0)))
+test(1963.095, shift(list(expression(1))), error = "Type 'expression' is not supported")
 test(1963.10, shift(DT, -1:1, type="shift", give.names = TRUE), # new type="shift" #3223
      ans <- list(`x_shift_-1` = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA),
           x_shift_0 = 1:10,
@@ -17946,6 +17959,12 @@ test(2231.50, DT[, weighted.mean(x, w, na.rm=TRUE), g, verbose=TRUE], data.table
 DT = data.table(x=c(1L,NA,NaN,3L,4L,5L,5L,6L), w=c(1L,NaN,NA,1L,2L,2L,2L,2L), g=rep(1L:2L, each=4L))
 test(2231.51, DT[, weighted.mean(x, w, na.rm=FALSE), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(NA, 5)), output="GForce optimized j to")
 test(2231.52, DT[, weighted.mean(x, w, na.rm=TRUE), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2, 5)), output="GForce optimized j to")
+# let wrongly named arguments get lost in ellipsis #5543
+DT = data.table(x=c(3.7,3.3,3.5,2.8), w=c(5,5,4,1), g=1L)
+test(2231.61, DT[, weighted.mean(x, w),        g, verbose=TRUE], data.table(g=1L, V1=3.45+1/300), output="GForce optimized j to")
+test(2231.62, DT[, weighted.mean(x, weight=w), g, verbose=TRUE], data.table(g=1L, V1=3.325),      output="GForce optimized j to")
+test(2231.63, DT[, weighted.mean(x, w, na.rm=FALSE), g],    DT[, stats::weighted.mean(x, w, na.rm=FALSE), g])
+test(2231.64, DT[, weighted.mean(x, weight=w, na.rm=TRUE)], DT[, stats::weighted.mean(x, weight=w, na.rm=TRUE)])
 options(old)
 
 # cols argument for unique.data.table, #5243
@@ -18184,3 +18203,45 @@ test(2241.14, r, data.table(id=1:2, x=c(5L,2L)))
 DT = data.table(a=1, b=2, c=3)
 test(2242.1, DT[, .SD, .SDcols=2-1], DT[, .(a)])
 test(2242.2, DT[, .SD, .SDcols=length(DT)-1], DT[, .SD, .SDcols=2])
+
+# turn off GForce where arguments are calls but still allow variables, #5547
+options(datatable.optimize=2L)
+dt = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L))
+i = c(0,1)
+j = 1L
+t = "lead"
+f = shift
+test(2243.01, dt[, shift(x, i, type=t), by=y, verbose=TRUE],               dt[, f(x, c(0,1), type="lead"), by=y],         output="GForce optimized j to")
+test(2243.02, dt[, shift(x, abs(c(0,1)), type=t), by=y, verbose=TRUE],     dt[, f(x, c(0,1), type="lead"), by=y],         output="GForce optimized j to")
+test(2243.03, dt[, shift(x, abs(i), type=t), by=y, verbose=TRUE],          dt[, f(x, c(0,1), type="lead"), by=y],         output="GForce FALSE")
+test(2243.04, dt[, shift(x, i, type=as.character(t)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y],         output="GForce FALSE")
+test(2243.05, dt[, shift(x, i, type=t, fill=1), by=y, verbose=TRUE],       dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to")
+test(2243.06, dt[, shift(x, i, type=t, fill=abs(1)), by=y, verbose=TRUE],  dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to")
+test(2243.07, dt[, shift(x, i, type=t, fill=abs(j)), by=y, verbose=TRUE],  dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce FALSE")
+test(2243.08, dt[, .(shift(x, i, type=t)), by=y, verbose=TRUE],            dt[, .(f(x, c(0,1), type="lead")), by=y],      output="GForce optimized j to")
+# GForce only var or call without vars as n of head/tail/"["(x, n)
+dt = data.table(id=c(1L,1L,2L), v=1:3)
+j = 1L
+test(2243.11, dt[, head(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to")
+test(2243.12, dt[, tail(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to")
+test(2243.13, dt[, v[j], id,       verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to")
+# GForce only var or call without vars as na.rm of sum, mean, median, prod, min, max, var
+j = FALSE
+test(2243.21, dt[, sum(v,  na.rm=j),   id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(3L,3L)),        output="GForce optimized j to")
+test(2243.22, dt[, mean(v, na.rm=j),   id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)),        output="GForce optimized j to")
+test(2243.23, dt[, median(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)),        output="GForce optimized j to")
+test(2243.24, dt[, prod(v, na.rm=j),   id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2,3)),          output="GForce optimized j to")
+test(2243.25, dt[, min(v,  na.rm=j),   id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)),        output="GForce optimized j to")
+test(2243.26, dt[, max(v,  na.rm=j),   id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)),        output="GForce optimized j to")
+test(2243.27, dt[, var(v,  na.rm=j),   id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(0.5,NA)),       output="GForce optimized j to")
+test(2243.28, dt[, sd(v,   na.rm=j),   id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(sqrt(0.5),NA)), output="GForce optimized j to")
+dt = data.table(g=1:2, y=1:4)
+j = TRUE
+test(2243.31, dt[, sum(y,  na.rm=as.logical(j)),   g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(4L,6L)),        output="GForce FALSE")
+test(2243.32, dt[, mean(y, na.rm=as.logical(j)),   g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)),          output="GForce FALSE")
+test(2243.33, dt[, median(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)),          output="GForce FALSE")
+test(2243.34, dt[, prod(y, na.rm=as.logical(j)),   g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3,8)),          output="GForce FALSE")
+test(2243.35, dt[, min(y,  na.rm=as.logical(j)),   g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(1L,2L)),        output="GForce FALSE")
+test(2243.36, dt[, max(y,  na.rm=as.logical(j)),   g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3L,4L)),        output="GForce FALSE")
+test(2243.37, dt[, var(y,  na.rm=as.logical(j)),   g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,2)),          output="GForce FALSE")
+test(2243.38, dt[, sd(y,   na.rm=as.logical(j)),   g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(sqrt(c(2,2)))), output="GForce FALSE")
diff --git a/src/shift.c b/src/shift.c
@@ -42,11 +42,11 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type)
     R_xlen_t xrows = xlength(elem);
     SEXP thisfill = PROTECT(coerceAs(fill, elem, ScalarLogical(0))); nprotect++;  // #4865 use coerceAs for type coercion
     switch (TYPEOF(elem)) {
-    case INTSXP : {
+    case INTSXP: case LGLSXP: {
       const int ifill = INTEGER(thisfill)[0];
       for (int j=0; j<nk; j++) {
         SEXP tmp;
-        SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) );
+        SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(TYPEOF(elem), xrows) );
         const int *restrict ielem = INTEGER(elem);
         int *restrict itmp = INTEGER(tmp);
         size_t thisk = cycle ? abs(kd[j]) % xrows : MIN(abs(kd[j]), xrows);
@@ -114,29 +114,6 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type)
         copyMostAttrib(elem, tmp);
       }
     } break;
-    case LGLSXP : {
-      const int lfill = LOGICAL(thisfill)[0];
-      for (int j=0; j<nk; j++) {
-        SEXP tmp;
-        SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) );
-        const int *restrict lelem = LOGICAL(elem);
-        int *restrict ltmp = LOGICAL(tmp);
-        size_t thisk = cycle ? abs(kd[j]) % xrows : MIN(abs(kd[j]), xrows);
-        size_t tailk = xrows-thisk;
-        if (((stype == LAG || stype == CYCLIC) && kd[j] >= 0) || (stype == LEAD && kd[j] < 0)) {
-          if (tailk > 0) memmove(ltmp+thisk, lelem, tailk*size);
-          if (cycle) {
-            if (thisk > 0) memmove(ltmp, lelem+tailk, thisk*size);
-          } else for (int m=0; m<thisk; m++) ltmp[m] = cycle ? lelem[m+tailk] : lfill;
-        } else {
-          if (tailk > 0) memmove(ltmp, lelem+thisk, tailk*size);
-          if (cycle) {
-            if (thisk > 0) memmove(ltmp+tailk, lelem, thisk*size);
-          } else for (int m=tailk; m<xrows; m++) ltmp[m] = cycle ? lelem[m-tailk] : lfill;
-        }
-        copyMostAttrib(elem, tmp);
-      }
-    } break;
     case STRSXP : {
       const SEXP sfill = STRING_ELT(thisfill, 0);
       for (int j=0; j<nk; j++) {
@@ -167,6 +144,29 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type)
         copyMostAttrib(elem, tmp);
       }
     } break;
+    case RAWSXP : {
+      const Rbyte rfill = RAW(thisfill)[0];
+      for (int j=0; j<nk; j++) {
+        SEXP tmp;
+        SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(RAWSXP, xrows) );
+        const Rbyte *restrict delem = RAW(elem);
+        Rbyte *restrict dtmp = RAW(tmp);
+        size_t thisk = cycle ? abs(kd[j]) % xrows : MIN(abs(kd[j]), xrows);
+        size_t tailk = xrows-thisk;
+        if (((stype == LAG || stype == CYCLIC) && kd[j] >= 0) || (stype == LEAD && kd[j] < 0)) {
+          if (tailk > 0) memmove(dtmp+thisk, delem, tailk*size);
+          if (cycle) {
+            if (thisk > 0) memmove(dtmp, delem+tailk, thisk*size);
+          } else for (int m=0; m<thisk; m++) dtmp[m] = rfill;
+        } else {
+          if (tailk > 0) memmove(dtmp, delem+thisk, tailk*size);
+          if (cycle) {
+            if (thisk > 0) memmove(dtmp+tailk, delem, thisk*size);
+          } else for (int m=tailk; m<xrows; m++) dtmp[m] = rfill;
+        }
+        copyMostAttrib(elem, tmp);
+      }
+    } break;
     default :
       error(_("Type '%s' is not supported"), type2char(TYPEOF(elem)));
     }