Skip to content

Commit 821c8f9

Browse files
Transpose(dt) allows to return list without promoting elements to maxtype (#5805)
* add feature * change fill * undup code * update arguments * add man * add tests * update usage docs * add coverage * add factors test * update tests for factors * add NEWS * update news * add example to docs * update docs * Update NEWS.md Co-authored-by: Michael Chirico <[email protected]> * remove extra blank line * ease t/f error * rm blank line * restore logical case * reordering test case numbers * fix LGL case * use unlist as proper action * move NEWS * fix doc * rm blank line in tests --------- Co-authored-by: Michael Chirico <[email protected]>
1 parent 8de09b2 commit 821c8f9

File tree

6 files changed

+42
-10
lines changed

6 files changed

+42
-10
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
4. Namespace-qualifying `data.table::shift()`, `data.table::first()`, or `data.table::last()` will not deactivate GForce, [#5942](https://github.com/Rdatatable/data.table/issues/5942). Thanks @MichaelChirico for the proposal and fix. Namespace-qualifying other calls like `stats::sum()`, `base::prod()`, etc., continue to work as an escape valve to avoid GForce, e.g. to ensure S3 method dispatch.
2222

23+
5. `transpose` gains `list.cols=` argument, [#5639](https://github.com/Rdatatable/data.table/issues/5639). Use this to return output with list columns and avoids type promotion (an exception is `factor` columns which are promoted to `character` for consistency between `list.cols=TRUE` and `list.cols=FALSE`). This is convenient for creating a row-major representation of a table. Thanks to @MLopez-Ibanez for the request, and Benjamin Schwendinger for the PR.
24+
2325
## BUG FIXES
2426

2527
1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix.

R/transpose.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL) {
1+
transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL, list.cols=FALSE) {
22
if (!is.null(make.names)) {
33
stopifnot(length(make.names)==1L)
44
if (is.character(make.names)) {
@@ -14,7 +14,7 @@ transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names
1414
colnames = as.character(l[[make.names]])
1515
l = if (is.data.table(l)) l[,-make.names,with=FALSE] else l[-make.names]
1616
}
17-
ans = .Call(Ctranspose, l, fill, ignore.empty, keep.names)
17+
ans = .Call(Ctranspose, l, fill, ignore.empty, keep.names, list.cols)
1818
if (!is.null(make.names)) setattr(ans, "names", c(keep.names, colnames))
1919
else if (is.data.frame(l)) # including data.table but not plain list
2020
setattr(ans, "names", c(keep.names, paste0("V", seq_len(length(ans)-length(keep.names)))))

inst/tests/tests.Rraw

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6921,10 +6921,22 @@ ll = sapply(ll, paste, collapse=",")
69216921
test(1477.07, transpose(strsplit(ll, ",", fixed=TRUE)), tstrsplit(ll, ",", fixed=TRUE))
69226922
test(1477.08, transpose(1:5), error="l must be a list")
69236923
test(1477.09, transpose(list(as.complex(c(1, 1+5i)))), error="Unsupported column type")
6924-
test(1477.10, transpose(list(list(1:5))), error="Item 1 of list input is")
6924+
test(1477.10, transpose(list(x~y)), error="Item 1 of list input is")
69256925
test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be a length 1 vector")
69266926
test(1477.12, transpose(as.list(1:5), ignore.empty=NA), error="ignore.empty should be logical TRUE/FALSE")
69276927
test(1477.13, transpose(list()), list())
6928+
# return list columns #5639
6929+
la = list(as.list(1:3), list("a","b","c"))
6930+
lb = list(list(1L,"a"), list(2L,"b"), list(3L,"c"))
6931+
test(1477.14, transpose(list(1:3, c("a","b","c")), list.cols=TRUE), lb)
6932+
test(1477.15, transpose(list(1:3, c("a","b","c")), list.cols=FALSE), lapply(lb, unlist))
6933+
test(1477.16, transpose(la, list.cols=TRUE), lb)
6934+
test(1477.17, transpose(lb, list.cols=TRUE), la)
6935+
test(1477.18, transpose(list(list(1L,"a"), list(2L), list(3L,"c")), list.cols=TRUE, fill="b"), la)
6936+
test(1477.19, transpose(list(1:2, c("a","b","c")), list.cols=TRUE, fill=3L), lb)
6937+
test(1477.20, transpose(list(factor(letters[1:3])), list.cols=TRUE), list(list("a"), list("b"), list("c")))
6938+
test(1477.21, transpose(list(factor(letters[1:3])), list.cols=FALSE), list("a", "b", "c"))
6939+
test(1477.22, transpose(la, list.cols=NA), error="list.cols should be logical TRUE/FALSE.")
69286940

69296941
# #480 `setDT` and 'lapply'
69306942
ll = list(data.frame(a=1), data.frame(x=1, y=2), NULL, list())

man/transpose.Rd

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66
}
77

88
\usage{
9-
transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL)
9+
transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL, list.cols=FALSE)
1010
}
1111
\arguments{
1212
\item{l}{ A list, data.frame or data.table. }
1313
\item{fill}{ Default is \code{NA}. It is used to fill shorter list elements so as to return each element of the transposed result of equal lengths. }
1414
\item{ignore.empty}{Default is \code{FALSE}. \code{TRUE} will ignore length-0 list elements.}
1515
\item{keep.names}{The name of the first column in the result containing the names of the input; e.g. \code{keep.names="rn"}. By default \code{NULL} and the names of the input are discarded.}
1616
\item{make.names}{The name or number of a column in the input to use as names of the output; e.g. \code{make.names="rn"}. By default \code{NULL} and default names are given to the output columns.}
17+
\item{list.cols}{Default is \code{FALSE}. \code{TRUE} will avoid promoting types and return columns of type \code{list} instead. \code{factor} will always be cast to \code{character}.}
1718
}
1819
\details{
1920
The list elements (or columns of \code{data.frame}/\code{data.table}) should be all \code{atomic}. If list elements are of unequal lengths, the value provided in \code{fill} will be used so that the resulting list always has all elements of identical lengths. The class of input object is also preserved in the transposed result.
@@ -38,6 +39,14 @@ setDT(transpose(ll, fill=0))[]
3839
DT = data.table(x=1:5, y=6:10)
3940
transpose(DT)
4041

42+
DT = data.table(x=1:3, y=c("a","b","c"))
43+
transpose(DT, list.cols=TRUE)
44+
45+
# base R equivalent of transpose
46+
l = list(1:3, c("a", "b", "c"))
47+
lapply(seq(length(l[[1]])), function(x) lapply(l, `[[`, x))
48+
transpose(l, list.cols=TRUE)
49+
4150
ll = list(nm=c('x', 'y'), 1:2, 3:4)
4251
transpose(ll, make.names="nm")
4352
}

src/data.table.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ SEXP lookup(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
306306
SEXP overlaps(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
307307
SEXP whichwrapper(SEXP, SEXP);
308308
SEXP shift(SEXP, SEXP, SEXP, SEXP);
309-
SEXP transpose(SEXP, SEXP, SEXP, SEXP);
309+
SEXP transpose(SEXP, SEXP, SEXP, SEXP, SEXP);
310310
SEXP anyNA(SEXP, SEXP);
311311
SEXP setlevels(SEXP, SEXP, SEXP);
312312
SEXP rleid(SEXP, SEXP);

src/transpose.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include <Rdefines.h>
33
#include <time.h>
44

5-
SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {
5+
SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg, SEXP listColsArg) {
66

77
int nprotect=0;
88
if (!isNewList(l))
@@ -18,23 +18,26 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {
1818
if (length(fill) != 1)
1919
error(_("fill must be a length 1 vector, such as the default NA"));
2020
R_len_t ln = LENGTH(l);
21+
if (!IS_TRUE_OR_FALSE(listColsArg))
22+
error(_("list.cols should be logical TRUE/FALSE."));
23+
bool listCol = LOGICAL(listColsArg)[0];
2124

2225
// preprocessing
2326
int maxlen=0, zerolen=0;
2427
SEXPTYPE maxtype=0;
2528
for (int i=0; i<ln; ++i) {
2629
SEXP li = VECTOR_ELT(l, i);
27-
if (!isVectorAtomic(li) && !isNull(li))
28-
error(_("Item %d of list input is not an atomic vector"), i+1);
30+
if (!isVectorAtomic(li) && !isNull(li) && !isNewList(li))
31+
error(_("Item %d of list input is not either an atomic vector, or a list"), i+1);
2932
const int len = length(li);
3033
if (len>maxlen) maxlen=len;
3134
zerolen += (len==0);
3235
SEXPTYPE type = TYPEOF(li);
3336
if (isFactor(li)) type=STRSXP;
3437
if (type>maxtype) maxtype=type;
3538
}
39+
if (listCol) maxtype=VECSXP; // need to keep preprocessing for zerolen
3640
fill = PROTECT(coerceVector(fill, maxtype)); nprotect++;
37-
3841
SEXP ans = PROTECT(allocVector(VECSXP, maxlen+rn)); nprotect++;
3942
int anslen = (ignore) ? (ln - zerolen) : ln;
4043
if (rn) {
@@ -54,7 +57,7 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {
5457
const int len = length(li);
5558
if (ignore && len==0) continue;
5659
if (TYPEOF(li) != maxtype) {
57-
li = PROTECT(isFactor(li) ? asCharacterFactor(li) : coerceVector(li, maxtype));
60+
li = PROTECT(isFactor(li) ? (listCol ? coerceVector(asCharacterFactor(li), VECSXP) : asCharacterFactor(li)) : coerceVector(li, maxtype));
5861
} else PROTECT(li); // extra PROTECT just to help rchk by avoiding two counter variables
5962
switch (maxtype) {
6063
case LGLSXP : {
@@ -84,6 +87,12 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {
8487
SET_STRING_ELT(ansp[j+rn], k, j<len ? STRING_ELT(li, j) : sfill);
8588
}
8689
} break;
90+
case VECSXP : {
91+
const SEXP vfill = VECTOR_ELT(fill, 0);
92+
for (int j=0; j<maxlen; ++j) {
93+
SET_VECTOR_ELT(ansp[j+rn], k, j<len ? VECTOR_ELT(li, j) : vfill);
94+
}
95+
} break;
8796
default :
8897
error(_("Unsupported column type '%s'"), type2char(maxtype));
8998
}

0 commit comments

Comments
 (0)