@@ -202,8 +202,13 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
202202 if (isVectorAtomic (ans )) {
203203 SEXP tt = PROTECT (allocVector (VECSXP , 1 ));
204204 SET_VECTOR_ELT (tt , 0 , ans );
205- UNPROTECT (2 );
206- return tt ;
205+ UNPROTECT (1 );
206+ ans = tt ;
207+ }
208+ for (int i = 0 ; i < LENGTH (ans ); ++ i ) {
209+ SEXP tt = VECTOR_ELT (ans , i );
210+ setAttrib (tt , sym_lens , R_NilValue );
211+ setAttrib (tt , sym_first , R_NilValue );
207212 }
208213 UNPROTECT (1 );
209214 return ans ;
@@ -930,9 +935,18 @@ static SEXP gfirstlast(const SEXP x, const bool first, const SEXP nArg, const bo
930935 }
931936 }
932937 SEXP ans = PROTECT (allocVector (TYPEOF (x ), anslen ));
938+ int * anslens = NULL ;
939+ if (narm ) {
940+ // how many non-NA were found for each group
941+ SEXP v ;
942+ setAttrib (ans , sym_lens , v = allocVector (INTSXP , ngrp ));
943+ setAttrib (ans , sym_first , ScalarLogical (first )); // so gforce knows which end to pad if necessary
944+ anslens = INTEGER (v );
945+ }
933946 int ansi = 0 ;
934947 #define DO (CTYPE , RTYPE , RNA , ASSIGN ) { \
935948 const CTYPE *xd = (const CTYPE *)RTYPE(x); \
949+ CTYPE *ansd = (CTYPE *)RTYPE(ans); \
936950 if (w==1 || !nthvalue) { \
937951 const int inc = first ? +1 : -1; \
938952 for (int g=0; g<ngrp; ++g) { \
@@ -951,16 +965,32 @@ static SEXP gfirstlast(const SEXP x, const bool first, const SEXP nArg, const bo
951965 /* const-bool narm and short-circuit-&& means above if() insignificant */ \
952966 ASSIGN ; write ++ ; \
953967 } \
954- const CTYPE val = RNA ; \
968+ const CTYPE val = RNA ; /* TODO remove these 2 lines */ \
955969 while (write < thisn ) { ASSIGN ; write ++ ; } /* when not enough non-NA pad with NA */ \
956- if (!first ) ansi += thisn + 1 ; \
970+ if (false /*put back narm*/ ) { \
971+ anslens [g ] = write ; \
972+ if (write < thisn && !first ) { \
973+ /* fewer than MIN(w,grpn) non-NA are present which we wrote (working backwards) to */ \
974+ /* the end of the allocation for the the group; budge them up to close the gap */ \
975+ int gap = thisn - write ; \
976+ memmove (ansd + ansi - gap + 1 , ansd + ansi + 1 , gap * sizeof (CTYPE )); \
977+ /* the memmove was behind the write-barrier so careful to now blank off the STRSXP */ \
978+ /* and VECSXP pointers at the end of the group otherwise the next group will assign */ \
979+ /* using the write-barrier and incorrectly decremement those as old values */ \
980+ memset (ansd + ansi + write - gap + 1 , 0 , gap * sizeof (CTYPE )); \
981+ ansi -= gap ; \
982+ } \
983+ /* else when first there's nothing more to do for this group as we'll write the next */ \
984+ /* group's non-NA values in the next row */ \
985+ } \
986+ if (!first ) ansi += write + 1 ; /* we wrote backwards so pass over what we wrote */ \
957987 } \
958- } else if (first ) { \
988+ } else if (first ) { \
959989 /* gnthvalue */ \
960990 const int inc = 1 ; \
961991 for (int g = 0 ; g < ngrp ; ++ g ) { \
962992 const int grpn = grpsize [g ]; \
963- if (w > grpn || w == 0 ) { const CTYPE val = RNA ; ASSIGN ; continue ; } \
993+ if (w > grpn || w == 0 ) { const CTYPE val = RNA ; ASSIGN ; continue ; } \
964994 const int j = ff [g ]- 1 + w - 1 ; \
965995 const int k = issorted ? j : oo [j ]- 1 ; \
966996 const CTYPE val = nosubset ? xd [k ] : (irows [k ]== NA_INTEGER ? RNA : xd [irows [k ]- 1 ]); \
@@ -977,35 +1007,37 @@ static SEXP gfirstlast(const SEXP x, const bool first, const SEXP nArg, const bo
9771007 case LGLSXP : {
9781008 #undef ISNAT
9791009 #define ISNAT (x ) ((x)==NA_INTEGER)
980- int * ansd = LOGICAL ( ans ); DO (int , LOGICAL , NA_LOGICAL , ansd [ansi ]= val ; ansi += inc )
1010+ DO (int , LOGICAL , NA_LOGICAL , ansd [ansi ]= val ; ansi += inc )
9811011 } break ;
9821012 case INTSXP : {
9831013 #undef ISNAT
9841014 #define ISNAT (x ) ((x)==NA_INTEGER)
985- int * ansd = INTEGER ( ans ); DO (int , INTEGER , NA_INTEGER , ansd [ansi ]= val ; ansi += inc )
1015+ DO (int , INTEGER , NA_INTEGER , ansd [ansi ]= val ; ansi += inc )
9861016 } break ;
9871017 case REALSXP : if (INHERITS (x , char_integer64 )) {
9881018 #undef ISNAT
9891019 #define ISNAT (x ) ((x)==NA_INTEGER64)
990- int64_t * ansd = ( int64_t * ) REAL ( ans ); DO (int64_t , REAL , NA_INTEGER64 , ansd [ansi ]= val ; ansi += inc )
1020+ DO (int64_t , REAL , NA_INTEGER64 , ansd [ansi ]= val ; ansi += inc )
9911021 } else {
9921022 #undef ISNAT
9931023 #define ISNAT (x ) (ISNAN(x))
994- double * ansd = REAL ( ans ); DO (double , REAL , NA_REAL , ansd [ansi ]= val ; ansi += inc )
1024+ DO (double , REAL , NA_REAL , ansd [ansi ]= val ; ansi += inc )
9951025 } break ;
9961026 case CPLXSXP : {
9971027 #undef ISNAT
9981028 #define ISNAT (x ) (ISNAN_COMPLEX(x))
999- Rcomplex * ansd = COMPLEX ( ans ); DO (Rcomplex , COMPLEX , NA_CPLX , ansd [ansi ]= val ; ansi += inc )
1029+ DO (Rcomplex , COMPLEX , NA_CPLX , ansd [ansi ]= val ; ansi += inc )
10001030 } break ;
10011031 case STRSXP : {
10021032 #undef ISNAT
10031033 #define ISNAT (x ) ((x)==NA_STRING)
1004- DO (SEXP , STRING_PTR , NA_STRING , SET_STRING_ELT (ans ,ansi ,val ); ansi += inc ) } break ;
1034+ DO (SEXP , STRING_PTR , NA_STRING , SET_STRING_ELT (ans ,ansi ,val ); ansi += inc )
1035+ } break ;
10051036 case VECSXP : {
10061037 #undef ISNAT
10071038 #define ISNAT (x ) (isNull(x) || (isLogical(x) && LENGTH(x)==1 && LOGICAL(x)[0]==NA_LOGICAL))
1008- DO (SEXP , SEXPPTR_RO , ScalarLogical (NA_LOGICAL ), SET_VECTOR_ELT (ans ,ansi ,val ); ansi += inc ) } break ;
1039+ DO (SEXP , SEXPPTR_RO , ScalarLogical (NA_LOGICAL ), SET_VECTOR_ELT (ans ,ansi ,val ); ansi += inc ) /* global replace ScalarLogical() with fixed constant R_FalseValue somehow */
1040+ } break ;
10091041 default :
10101042 error (_ ("Type '%s' is not supported by GForce head/tail/first/last/`[`. Either add the namespace prefix (e.g. utils::head(.)) or turn off GForce optimization using options(datatable.optimize=1)" ), type2char (TYPEOF (x )));
10111043 }
0 commit comments