Skip to content

Commit 8280e2d

Browse files
use integer indices for measure.vars and error on non scalar aggregates
1 parent c27ec26 commit 8280e2d

File tree

3 files changed

+70
-52
lines changed

3 files changed

+70
-52
lines changed

R/fcast.R

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
186186
maybe_err = function(list.of.columns) {
187187
if (!all(lengths(list.of.columns) == 1L)) {
188188
msg = gettext("Aggregating functions should take a vector as input and return a single value (length=1), but they do not, so the result is undefined. Please fix by modifying your function so that a single value is always returned.")
189-
if (is.null(fill)) { # TODO change to always stopf #6329
190-
stop(msg, domain=NA, call. = FALSE)
191-
} else {
192-
warning(msg, domain=NA, call. = FALSE)
193-
}
189+
stop(msg, domain=NA, call. = FALSE)
194190
}
195191
list.of.columns
196192
}

inst/tests/tests.Rraw

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17417,13 +17417,13 @@ exid = data.table(id=1, expected)
1741717417
test(2182.3, melt(DTid, measure.vars=list(a=c(NA,1), b=2:3), id.vars="id"), exid)
1741817418
test(2182.4, melt(DTid, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2")), id.vars="id"), exid)
1741917419
test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE), data.table(variable=factor(2), a=2, b=2))
17420-
test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("b1","b2")), b=c(1,2)), warning="measure.vars is a list with length=1") # measure.vars named list length=1, #5065
17421-
# consistency between measure.vars=list with length=1 and length>1, #5209
17422-
test(2182.71, melt(DT.wide, measure.vars=list("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor("a2"), value=2), warning="measure.vars is a list with length=1")
17420+
test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(1:2), b=c(1,2))) # list yields indices
17421+
# consistency between measure.vars=list with length=1 and length>1 now uses indices for list case, #5209
17422+
test(2182.71, melt(DT.wide, measure.vars=list("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor(1L), value=2))
1742317423
test(2182.72, melt(DT.wide, measure.vars=c("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor("a2"), value=2))
17424-
test(2182.73, melt(DT.wide, measure.vars=list("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable="a2", value=2), warning="measure.vars is a list with length=1")
17424+
test(2182.73, melt(DT.wide, measure.vars=list("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable=1L, value=2))
1742517425
test(2182.74, melt(DT.wide, measure.vars=c("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable="a2", value=2))
17426-
test(2182.75, melt(data.table(a=10, b=20), measure.vars=list(n="a"), variable.factor=FALSE), data.table(b=20, variable="a", n=10), warning="measure.vars is a list with length=1")#thanks @mnazarov
17426+
test(2182.75, melt(data.table(a=10, b=20), measure.vars=list(n="a"), variable.factor=FALSE), data.table(b=20, variable=1L, n=10))#thanks @mnazarov
1742717427

1742817428
### First block testing measurev
1742917429
# new variable_table attribute for measure.vars, PR#4731 for multiple issues

src/fmelt.c

Lines changed: 64 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -598,62 +598,84 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str
598598
if (data->lvalues==1 && length(VECTOR_ELT(data->valuecols, 0)) != data->lmax)
599599
internal_error(__func__, "getvarcols %d %d", length(VECTOR_ELT(data->valuecols, 0)), data->lmax); // # nocov
600600
if (isNull(data->variable_table)) {
601-
if ((data->lvalues == 1) & data->measure_is_list) {
602-
warning(_("measure.vars is a list with length=1, which as long documented should return integer indices in the 'variable' column, but currently returns character column names. To increase consistency in the next release, we plan to change 'variable' to integer, so users who were relying on this behavior should change measure.vars=list('col_name') (output variable is column name now, but will become column index/integer) to measure.vars='col_name' (variable is column name before and after the planned change)."));
603-
}
604601
if (!varfactor) {
605-
SET_VECTOR_ELT(ansvars, 0, target=allocVector(STRSXP, data->totlen));
606-
if (data->lvalues == 1) {//one value column to output. TODO #5247 change to !data->measure_is_list
607-
const int *thisvaluecols = INTEGER(VECTOR_ELT(data->valuecols, 0));
602+
if (data->measure_is_list) {
603+
// Return integer indices for list measure.vars (consistency with docs)
604+
SET_VECTOR_ELT(ansvars, 0, target=allocVector(INTSXP, data->totlen));
605+
int *td = INTEGER(target);
608606
for (int j=0, ansloc=0; j<data->lmax; ++j) {
609607
const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow;
610-
SEXP str = STRING_ELT(dtnames, thisvaluecols[j]-1);
611-
for (int k=0; k<thislen; ++k) SET_STRING_ELT(target, ansloc++, str);
608+
for (int k=0; k<thislen; ++k) td[ansloc++] = j+1;
612609
}
613-
} else {//multiple value columns to output.
614-
for (int j=0, ansloc=0, level=1; j<data->lmax; ++j) {
615-
const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow;
616-
char buff[20];
617-
snprintf(buff, sizeof(buff), "%d", level++); // # notranslate
618-
for (int k=0; k<thislen; ++k) SET_STRING_ELT(target, ansloc++, mkChar(buff));
610+
} else {
611+
// same behavior for vector measure.vars: variable is column names
612+
SET_VECTOR_ELT(ansvars, 0, target=allocVector(STRSXP, data->totlen));
613+
if (data->lvalues == 1) {
614+
const int *thisvaluecols = INTEGER(VECTOR_ELT(data->valuecols, 0));
615+
for (int j=0, ansloc=0; j<data->lmax; ++j) {
616+
const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow;
617+
SEXP str = STRING_ELT(dtnames, thisvaluecols[j]-1);
618+
for (int k=0; k<thislen; ++k) SET_STRING_ELT(target, ansloc++, str);
619+
}
620+
} else {//multiple value columns to output.
621+
for (int j=0, ansloc=0, level=1; j<data->lmax; ++j) {
622+
const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow;
623+
char buff[20];
624+
snprintf(buff, sizeof(buff), "%d", level++); // # notranslate
625+
for (int k=0; k<thislen; ++k) SET_STRING_ELT(target, ansloc++, mkChar(buff));
626+
}
619627
}
620628
}
621629
} else {// varfactor==TRUE
622630
SET_VECTOR_ELT(ansvars, 0, target=allocVector(INTSXP, data->totlen));
623631
SEXP levels;
624632
int *td = INTEGER(target);
625-
if (data->lvalues == 1) {//one value column to output. TODO #5247 change to !data->measure_is_list
626-
SEXP thisvaluecols = VECTOR_ELT(data->valuecols, 0);
627-
int len = length(thisvaluecols);
628-
levels = PROTECT(allocVector(STRSXP, len)); protecti++;
629-
const int *vd = INTEGER(thisvaluecols);
630-
for (int j=0; j<len; ++j) SET_STRING_ELT(levels, j, STRING_ELT(dtnames, vd[j]-1));
631-
SEXP m = PROTECT(chmatch(levels, levels, 0)); protecti++; // do we have any dups?
632-
int numRemove = 0; // remove dups and any for which narm and all-NA
633-
int *md = INTEGER(m);
634-
for (int j=0; j<len; ++j) {
635-
if (md[j]!=j+1 /*dup*/ || (data->narm && length(VECTOR_ELT(data->not_NA_indices, j))==0)) { numRemove++; md[j]=0; }
636-
}
637-
if (numRemove) {
638-
SEXP newlevels = PROTECT(allocVector(STRSXP, len-numRemove)); protecti++;
639-
for (int i=0, loc=0; i<len; ++i) if (md[i]!=0) { SET_STRING_ELT(newlevels, loc++, STRING_ELT(levels, i)); }
640-
m = PROTECT(chmatch(levels, newlevels, 0)); protecti++; // budge up the gaps
641-
md = INTEGER(m);
642-
levels = newlevels;
633+
if (data->measure_is_list) {
634+
int nlevel = data->lmax;
635+
levels = PROTECT(allocVector(STRSXP, nlevel)); protecti++;
636+
for (int j=0; j<nlevel; ++j) {
637+
char buff[20];
638+
snprintf(buff, sizeof(buff), "%d", j+1); // # notranslate
639+
SET_STRING_ELT(levels, j, mkChar(buff));
643640
}
644641
for (int j=0, ansloc=0; j<data->lmax; ++j) {
645642
const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow;
646-
for (int k=0; k<thislen; ++k) td[ansloc++] = md[j];
643+
for (int k=0; k<thislen; ++k) td[ansloc++] = j+1;
647644
}
648-
} else {//multiple output columns.
649-
int nlevel=0;
650-
levels = PROTECT(allocVector(STRSXP, data->lmax)); protecti++;
651-
for (int j=0, ansloc=0; j<data->lmax; ++j) {
652-
const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow;
653-
char buff[20];
654-
snprintf(buff, sizeof(buff), "%d", nlevel + 1); // # notranslate
655-
SET_STRING_ELT(levels, nlevel++, mkChar(buff)); // generate levels = 1:nlevels
656-
for (int k=0; k<thislen; ++k) td[ansloc++] = nlevel;
645+
} else { // non-list measure.vars keeps legacy name-based levels
646+
if (data->lvalues == 1) {
647+
SEXP thisvaluecols = VECTOR_ELT(data->valuecols, 0);
648+
int len = length(thisvaluecols);
649+
levels = PROTECT(allocVector(STRSXP, len)); protecti++;
650+
const int *vd = INTEGER(thisvaluecols);
651+
for (int j=0; j<len; ++j) SET_STRING_ELT(levels, j, STRING_ELT(dtnames, vd[j]-1));
652+
SEXP m = PROTECT(chmatch(levels, levels, 0)); protecti++; // do we have any dups?
653+
int numRemove = 0; // remove dups and any for which narm and all-NA
654+
int *md = INTEGER(m);
655+
for (int j=0; j<len; ++j) {
656+
if (md[j]!=j+1 /*dup*/ || (data->narm && length(VECTOR_ELT(data->not_NA_indices, j))==0)) { numRemove++; md[j]=0; }
657+
}
658+
if (numRemove) {
659+
SEXP newlevels = PROTECT(allocVector(STRSXP, len-numRemove)); protecti++;
660+
for (int i=0, loc=0; i<len; ++i) if (md[i]!=0) { SET_STRING_ELT(newlevels, loc++, STRING_ELT(levels, i)); }
661+
m = PROTECT(chmatch(levels, newlevels, 0)); protecti++; // budge up the gaps
662+
md = INTEGER(m);
663+
levels = newlevels;
664+
}
665+
for (int j=0, ansloc=0; j<data->lmax; ++j) {
666+
const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow;
667+
for (int k=0; k<thislen; ++k) td[ansloc++] = md[j];
668+
}
669+
} else {//multiple output columns.
670+
int nlevel=0;
671+
levels = PROTECT(allocVector(STRSXP, data->lmax)); protecti++;
672+
for (int j=0, ansloc=0; j<data->lmax; ++j) {
673+
const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow;
674+
char buff[20];
675+
snprintf(buff, sizeof(buff), "%d", nlevel + 1); // # notranslate
676+
SET_STRING_ELT(levels, nlevel++, mkChar(buff)); // generate levels = 1:nlevels
677+
for (int k=0; k<thislen; ++k) td[ansloc++] = nlevel;
678+
}
657679
}
658680
}
659681
setAttrib(target, R_LevelsSymbol, levels);

0 commit comments

Comments
 (0)