Skip to content

Commit 0851d10

Browse files
committed
Simplify nrow() getter for data.frame following code review
1 parent 486c452 commit 0851d10

File tree

5 files changed

+23
-64
lines changed

5 files changed

+23
-64
lines changed

ChangeLog

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1+
2025-12-22 Dirk Eddelbuettel <[email protected]>
2+
3+
* inst/include/Rcpp/DataFrame.h (nrow): Simplified per #1430 discussion
4+
relying on compact sequence also removing use of ATTRIB
5+
16
2025-12-21 Dirk Eddelbuettel <[email protected]>
27

3-
* inst/include/Rcpp/DataFrame.h (nrow): Rewritten using R_mapAttrib()
4-
avoiding ATTRIB() likely to be removed by R 4.6.0
5-
* inst/include/Rcpp/proxy/AttributeProxy.h (attributeNames): Idem
6-
* src/utilities.cpp: Add two helper functions used by R_mapAttrib()
7-
* inst/include/Rcpp/routines.h: Register two new helper functions
8+
* inst/include/Rcpp/proxy/AttributeProxy.h (attributeNames):
9+
Rewritten using R_mapAttrib() avoiding ATTRIB() to be removed in R 4.6.0
10+
* src/utilities.cpp: Add helper function used by R_mapAttrib()
11+
* inst/include/Rcpp/routines.h: Register new helper function
812
* src/rcpp_init.cpp (registerFunctions): Idem
913

1014
* .github/workflows/docker.yaml: Add workflow_dispatch, update

inst/include/Rcpp/DataFrame.h

Lines changed: 14 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -61,36 +61,18 @@ namespace Rcpp{
6161
}
6262

6363
// By definition, the number of rows in a data.frame is contained
64-
// in its row.names attribute. If it has row names of the form 1:n,
65-
// they will be stored as {NA_INTEGER, -<nrow>}. Unfortunately,
66-
// getAttrib(df, R_RowNamesSymbol) will force an expansion of that
67-
// compact form thereby allocating a huge vector when we just want
68-
// the row.names. Hence this workaround.
69-
inline int nrow() const {
70-
#if R_VERSION >= R_Version(4, 6, 0)
71-
Shield<SEXP> v = R_mapAttrib(Parent::get__(), get_row_count, R_NilValue);
72-
if (v != NULL && TYPEOF(v) == INTSXP) {
73-
return INTEGER(v)[0];
74-
} else {
75-
// TODO: error?
76-
return NA_INTEGER;
77-
}
78-
#else
79-
SEXP rn = R_NilValue ;
80-
SEXP att = ATTRIB( Parent::get__() ) ;
81-
while( att != R_NilValue ){
82-
if( TAG(att) == R_RowNamesSymbol ) {
83-
rn = CAR(att) ;
84-
break ;
85-
}
86-
att = CDR(att) ;
87-
}
88-
if (Rf_isNull(rn))
89-
return 0;
90-
if (TYPEOF(rn) == INTSXP && LENGTH(rn) == 2 && INTEGER(rn)[0] == NA_INTEGER)
91-
return std::abs(INTEGER(rn)[1]);
92-
return LENGTH(rn);
93-
#endif
64+
// in its row.names attribute. Since R 3.5.0 this is returned as a
65+
// compact sequence from which we can just take the length
66+
// Shield<SEXP> rn = Rf_getAttrib(Parent::get__(), R_RowNamesSymbol);
67+
// return Rf_xlength(rn);
68+
// But as this makes an allocation an even simpler check on length as
69+
// discussed in #1430 is also possible and preferable. We also switch
70+
// to returning R_xlen_t which as upcast from int is safe
71+
inline R_xlen_t nrow() const {
72+
SEXP x = Parent::get__();
73+
// We can simplify: zero row DFs have no names, else length of all vector same
74+
// by design constraint so can use vector length for desired row count
75+
return (XLENGTH(x) == 0) ? 0 : XLENGTH(VECTOR_ELT(x, 0));
9476
}
9577

9678
template <typename T>
@@ -118,8 +100,8 @@ namespace Rcpp{
118100
}
119101

120102
// Offer multiple variants to accomodate both old interface here and signatures in other classes
121-
inline int nrows() const { return DataFrame_Impl::nrow(); }
122-
inline int rows() const { return DataFrame_Impl::nrow(); }
103+
inline R_xlen_t nrows() const { return DataFrame_Impl::nrow(); }
104+
inline R_xlen_t rows() const { return DataFrame_Impl::nrow(); }
123105

124106
inline R_xlen_t ncol() const { return DataFrame_Impl::length(); }
125107
inline R_xlen_t cols() const { return DataFrame_Impl::length(); }

inst/include/Rcpp/routines.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -317,13 +317,6 @@ inline attribute_hidden SEXP get_attr_names(SEXP tag, SEXP attr, void* data){
317317
return fun(tag, attr, data);
318318
}
319319

320-
inline attribute_hidden SEXP get_row_count(SEXP tag, SEXP attr, void* data){
321-
typedef SEXP (*Fun)(SEXP, SEXP, void*);
322-
static Fun fun = GET_CALLABLE("get_row_count");
323-
return fun(tag, attr, data);
324-
}
325-
326-
327320
#endif
328321

329322

src/rcpp_init.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ void registerFunctions(){
129129
RCPP_REGISTER(Rcpp_cerr_get)
130130

131131
RCPP_REGISTER(get_attr_names)
132-
RCPP_REGISTER(get_row_count)
133132
#undef RCPP_REGISTER
134133
}
135134

src/utilities.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,22 +34,3 @@ SEXP get_attr_names(SEXP tag, SEXP attr, void* data) {
3434
vecptr->push_back(s);
3535
return NULL;
3636
}
37-
38-
// See WRE Section 6.22.6 'Working with attributes' under R 4.6.0 or later
39-
// This function extracts the number of rows in a data frame
40-
// It is used DataFrame_impl::nrow() in a call to R_mapAttrib()
41-
// [[Rcpp::register]]
42-
SEXP get_row_count(SEXP tag, SEXP attr, void* data) {
43-
if (tag == R_RowNamesSymbol) {
44-
if (TYPEOF(attr) == INTSXP && LENGTH(attr) == 2 && INTEGER(attr)[0] == NA_INTEGER) {
45-
int n = std::abs(INTEGER(attr)[1]);
46-
//Rcpp::Rcout << "Seeing " << n << std::endl;
47-
return Rf_ScalarInteger(n);
48-
}
49-
if (Rf_isNull(attr)) {
50-
return Rf_ScalarInteger(0);
51-
}
52-
return Rf_ScalarInteger(LENGTH(attr));
53-
}
54-
return NULL;
55-
}

0 commit comments

Comments
 (0)