Skip to content

Commit 08d6414

Browse files
committed
return number of non-NA by group as attribute
1 parent 29c6825 commit 08d6414

File tree

3 files changed

+51
-15
lines changed

3 files changed

+51
-15
lines changed

src/data.table.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ extern SEXP char_POSIXct;
8080
extern SEXP char_POSIXt;
8181
extern SEXP char_UTC;
8282
extern SEXP char_nanotime;
83-
extern SEXP char_lens;
8483
extern SEXP char_indices;
8584
extern SEXP char_allLen1;
8685
extern SEXP char_allGrp1;
@@ -95,6 +94,8 @@ extern SEXP sym_index;
9594
extern SEXP sym_BY;
9695
extern SEXP sym_starts, char_starts;
9796
extern SEXP sym_maxgrpn;
97+
extern SEXP sym_lens, char_lens;
98+
extern SEXP sym_first;
9899
extern SEXP sym_colClassesAs;
99100
extern SEXP sym_verbose;
100101
extern SEXP SelfRefSymbol;

src/gsumm.c

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,13 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
202202
if (isVectorAtomic(ans)) {
203203
SEXP tt = PROTECT(allocVector(VECSXP, 1));
204204
SET_VECTOR_ELT(tt, 0, ans);
205-
UNPROTECT(2);
206-
return tt;
205+
UNPROTECT(1);
206+
ans = tt;
207+
}
208+
for (int i=0; i<LENGTH(ans); ++i) {
209+
SEXP tt = VECTOR_ELT(ans, i);
210+
setAttrib(tt, sym_lens, R_NilValue);
211+
setAttrib(tt, sym_first, R_NilValue);
207212
}
208213
UNPROTECT(1);
209214
return ans;
@@ -930,9 +935,18 @@ static SEXP gfirstlast(const SEXP x, const bool first, const SEXP nArg, const bo
930935
}
931936
}
932937
SEXP ans = PROTECT(allocVector(TYPEOF(x), anslen));
938+
int *anslens = NULL;
939+
if (narm) {
940+
// how many non-NA were found for each group
941+
SEXP v;
942+
setAttrib(ans, sym_lens, v=allocVector(INTSXP, ngrp));
943+
setAttrib(ans, sym_first, ScalarLogical(first)); // so gforce knows which end to pad if necessary
944+
anslens = INTEGER(v);
945+
}
933946
int ansi = 0;
934947
#define DO(CTYPE, RTYPE, RNA, ASSIGN) { \
935948
const CTYPE *xd = (const CTYPE *)RTYPE(x); \
949+
CTYPE *ansd = (CTYPE *)RTYPE(ans); \
936950
if (w==1 || !nthvalue) { \
937951
const int inc = first ? +1 : -1; \
938952
for (int g=0; g<ngrp; ++g) { \
@@ -951,16 +965,32 @@ static SEXP gfirstlast(const SEXP x, const bool first, const SEXP nArg, const bo
951965
/* const-bool narm and short-circuit-&& means above if() insignificant */ \
952966
ASSIGN; write++; \
953967
} \
954-
const CTYPE val = RNA; \
968+
const CTYPE val = RNA; /* TODO remove these 2 lines */ \
955969
while (write<thisn) { ASSIGN; write++; } /* when not enough non-NA pad with NA */ \
956-
if (!first) ansi+=thisn+1; \
970+
if (false /*put back narm*/) { \
971+
anslens[g] = write; \
972+
if (write<thisn && !first) { \
973+
/* fewer than MIN(w,grpn) non-NA are present which we wrote (working backwards) to */ \
974+
/* the end of the allocation for the the group; budge them up to close the gap */ \
975+
int gap = thisn-write; \
976+
memmove(ansd+ansi-gap+1, ansd+ansi+1, gap*sizeof(CTYPE)); \
977+
/* the memmove was behind the write-barrier so careful to now blank off the STRSXP */ \
978+
/* and VECSXP pointers at the end of the group otherwise the next group will assign */ \
979+
/* using the write-barrier and incorrectly decremement those as old values */ \
980+
memset(ansd+ansi+write-gap+1, 0, gap*sizeof(CTYPE)); \
981+
ansi -= gap; \
982+
} \
983+
/* else when first there's nothing more to do for this group as we'll write the next */ \
984+
/* group's non-NA values in the next row */ \
985+
} \
986+
if (!first) ansi+=write+1; /* we wrote backwards so pass over what we wrote */ \
957987
} \
958-
} else if (first) { \
988+
} else if (first) { \
959989
/* gnthvalue */ \
960990
const int inc=1; \
961991
for (int g=0; g<ngrp; ++g) { \
962992
const int grpn = grpsize[g]; \
963-
if (w>grpn || w==0) { const CTYPE val=RNA; ASSIGN; continue; } \
993+
if (w>grpn || w==0) { const CTYPE val=RNA; ASSIGN; continue; } \
964994
const int j = ff[g]-1+w-1; \
965995
const int k = issorted ? j : oo[j]-1; \
966996
const CTYPE val = nosubset ? xd[k] : (irows[k]==NA_INTEGER ? RNA : xd[irows[k]-1]); \
@@ -977,35 +1007,37 @@ static SEXP gfirstlast(const SEXP x, const bool first, const SEXP nArg, const bo
9771007
case LGLSXP: {
9781008
#undef ISNAT
9791009
#define ISNAT(x) ((x)==NA_INTEGER)
980-
int *ansd=LOGICAL(ans); DO(int, LOGICAL, NA_LOGICAL, ansd[ansi]=val; ansi+=inc)
1010+
DO(int, LOGICAL, NA_LOGICAL, ansd[ansi]=val; ansi+=inc)
9811011
} break;
9821012
case INTSXP: {
9831013
#undef ISNAT
9841014
#define ISNAT(x) ((x)==NA_INTEGER)
985-
int *ansd=INTEGER(ans); DO(int, INTEGER, NA_INTEGER, ansd[ansi]=val; ansi+=inc)
1015+
DO(int, INTEGER, NA_INTEGER, ansd[ansi]=val; ansi+=inc)
9861016
} break;
9871017
case REALSXP: if (INHERITS(x, char_integer64)) {
9881018
#undef ISNAT
9891019
#define ISNAT(x) ((x)==NA_INTEGER64)
990-
int64_t *ansd=(int64_t *)REAL(ans); DO(int64_t, REAL, NA_INTEGER64, ansd[ansi]=val; ansi+=inc)
1020+
DO(int64_t, REAL, NA_INTEGER64, ansd[ansi]=val; ansi+=inc)
9911021
} else {
9921022
#undef ISNAT
9931023
#define ISNAT(x) (ISNAN(x))
994-
double *ansd=REAL(ans); DO(double, REAL, NA_REAL, ansd[ansi]=val; ansi+=inc)
1024+
DO(double, REAL, NA_REAL, ansd[ansi]=val; ansi+=inc)
9951025
} break;
9961026
case CPLXSXP: {
9971027
#undef ISNAT
9981028
#define ISNAT(x) (ISNAN_COMPLEX(x))
999-
Rcomplex *ansd=COMPLEX(ans); DO(Rcomplex, COMPLEX, NA_CPLX, ansd[ansi]=val; ansi+=inc)
1029+
DO(Rcomplex, COMPLEX, NA_CPLX, ansd[ansi]=val; ansi+=inc)
10001030
} break;
10011031
case STRSXP: {
10021032
#undef ISNAT
10031033
#define ISNAT(x) ((x)==NA_STRING)
1004-
DO(SEXP, STRING_PTR, NA_STRING, SET_STRING_ELT(ans,ansi,val); ansi+=inc) } break;
1034+
DO(SEXP, STRING_PTR, NA_STRING, SET_STRING_ELT(ans,ansi,val); ansi+=inc)
1035+
} break;
10051036
case VECSXP: {
10061037
#undef ISNAT
10071038
#define ISNAT(x) (isNull(x) || (isLogical(x) && LENGTH(x)==1 && LOGICAL(x)[0]==NA_LOGICAL))
1008-
DO(SEXP, SEXPPTR_RO, ScalarLogical(NA_LOGICAL), SET_VECTOR_ELT(ans,ansi,val); ansi+=inc) } break;
1039+
DO(SEXP, SEXPPTR_RO, ScalarLogical(NA_LOGICAL), SET_VECTOR_ELT(ans,ansi,val); ansi+=inc) /* global replace ScalarLogical() with fixed constant R_FalseValue somehow */
1040+
} break;
10091041
default:
10101042
error(_("Type '%s' is not supported by GForce head/tail/first/last/`[`. Either add the namespace prefix (e.g. utils::head(.)) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
10111043
}

src/init.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ SEXP sym_index;
2828
SEXP sym_BY;
2929
SEXP sym_starts, char_starts;
3030
SEXP sym_maxgrpn;
31+
SEXP sym_lens;
32+
SEXP sym_first;
3133
SEXP sym_colClassesAs;
3234
SEXP sym_verbose;
3335
SEXP SelfRefSymbol;
@@ -334,7 +336,7 @@ void attribute_visible R_init_data_table(DllInfo *info)
334336
char_UTC = PRINTNAME(install("UTC"));
335337
char_nanotime = PRINTNAME(install("nanotime"));
336338
char_starts = PRINTNAME(sym_starts = install("starts"));
337-
char_lens = PRINTNAME(install("lens"));
339+
char_lens = PRINTNAME(sym_lens = install("lens"));
338340
char_indices = PRINTNAME(install("indices"));
339341
char_allLen1 = PRINTNAME(install("allLen1"));
340342
char_allGrp1 = PRINTNAME(install("allGrp1"));
@@ -361,6 +363,7 @@ void attribute_visible R_init_data_table(DllInfo *info)
361363
sym_index = install("index");
362364
sym_BY = install(".BY");
363365
sym_maxgrpn = install("maxgrpn");
366+
sym_first = install("first");
364367
sym_colClassesAs = install("colClassesAs");
365368
sym_verbose = install("datatable.verbose");
366369
SelfRefSymbol = install(".internal.selfref");

0 commit comments

Comments
 (0)