Skip to content

Commit 4a8030c

Browse files
committed
Rewrite Subsetter class; return proxy and allow subset assignment
1 parent 259f39d commit 4a8030c

File tree

5 files changed

+188
-122
lines changed

5 files changed

+188
-122
lines changed

inst/include/Rcpp/traits/traits.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,18 @@
2323
#ifndef Rcpp__traits__traits__h
2424
#define Rcpp__traits__traits__h
2525

26+
namespace Rcpp {
27+
namespace traits {
28+
29+
template <typename T>
30+
struct identity { typedef T type; };
31+
32+
template <int I>
33+
struct int2type { enum { value = I }; };
34+
35+
}
36+
}
37+
2638
#include <Rcpp/traits/integral_constant.h>
2739
#include <Rcpp/traits/same_type.h>
2840
#include <Rcpp/traits/is_wide_string.h>

inst/include/Rcpp/vector/Subsetter.h

Lines changed: 156 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -23,137 +23,175 @@
2323
#define Rcpp_vector_Subsetter_h_
2424

2525
namespace Rcpp {
26+
27+
template <
28+
int RTYPE, template <class> class StoragePolicy,
29+
int RHS_RTYPE, bool RHS_NA, typename RHS_T
30+
>
31+
class SubsetProxy {
32+
33+
typedef Vector<RTYPE, StoragePolicy> LHS_t;
34+
typedef Vector<RHS_RTYPE, StoragePolicy> RHS_t;
35+
36+
public:
2637

27-
template <int RTYPE, template <class> class StoragePolicy, typename T>
28-
class Subsetter {
29-
30-
typedef Vector<RTYPE, StoragePolicy> VECTOR;
31-
32-
public:
33-
34-
explicit Subsetter(const Subsetter& rhs): vec(rhs.vec), other(rhs.other) {};
35-
Subsetter(const VECTOR& vec_, const T& other_): vec(vec_), other(other_) {};
36-
37-
inline operator SEXP() const {
38-
return subset_impl(vec, other).get__();
39-
}
40-
41-
inline operator VECTOR() const {
42-
return subset_impl(vec, other);
43-
}
44-
45-
private:
46-
47-
Subsetter() {};
48-
49-
// helper function used for the subset methods when going from logical to int
50-
// operates like R's which, but returns NA when it encounters an NA
51-
template <template <class> class OtherStoragePolicy>
52-
static Vector<INTSXP, StoragePolicy> which_na( const Vector<LGLSXP, OtherStoragePolicy>& x) {
38+
SubsetProxy(LHS_t& lhs_, const RHS_t& rhs_):
39+
lhs(lhs_), rhs(rhs_), lhs_n(lhs.size()), rhs_n(rhs.size()) {
5340

54-
std::vector<int> output;
55-
int n = x.size();
56-
output.reserve(n);
41+
indices.reserve(rhs_n);
42+
get_indices( traits::identity< traits::int2type<RHS_RTYPE> >() );
43+
44+
}
45+
46+
SubsetProxy(const SubsetProxy& other):
47+
lhs(other.lhs),
48+
rhs(other.rhs),
49+
lhs_n(other.lhs_n),
50+
rhs_n(other.rhs_n),
51+
indices(other.indices) {}
52+
53+
// Enable e.g. x[y] = z
54+
template <int OtherRTYPE, template <class> class OtherStoragePolicy>
55+
SubsetProxy& operator=(const Vector<OtherRTYPE, OtherStoragePolicy>& other) {
56+
int n = other.size();
57+
if (indices.size() != n) stop("index error");
58+
if (n == 1) {
5759
for (int i=0; i < n; ++i) {
58-
if (x[i] == NA_LOGICAL) {
59-
output.push_back(NA_INTEGER);
60-
} else if (x[i]) {
61-
output.push_back(i);
62-
}
60+
lhs[ indices[i] ] = other[0];
6361
}
64-
int n_ = output.size();
65-
Vector<INTSXP, StoragePolicy> output_ = no_init(n_);
66-
for (int i=0; i < n_; ++i) {
67-
output_[i] = output[i];
68-
};
69-
return output_;
70-
}
71-
72-
// Subsetting for logicals
73-
template <template <class> class OtherStoragePolicy>
74-
inline Vector<RTYPE, StoragePolicy> subset_impl( const VECTOR& this_, const Vector<LGLSXP, OtherStoragePolicy>& x ) const {
75-
if (this_.size() != x.size()) {
76-
stop("subsetting with a LogicalVector requires both vectors to be of equal size");
62+
} else if (n == indices.size()) {
63+
for (int i=0; i < n; ++i) {
64+
lhs[ indices[i] ] = other[i];
7765
}
78-
Vector<INTSXP, StoragePolicy> tmp = which_na(x);
79-
if (!tmp.size()) return Vector<RTYPE, StoragePolicy>(0);
80-
else return subset_impl(this_, tmp);
66+
} else {
67+
stop("index error");
8168
}
82-
83-
// Subsetting for characters
84-
template <template <class> class OtherStoragePolicy>
85-
inline Vector<RTYPE, StoragePolicy> subset_impl( const VECTOR& this_, const Vector<STRSXP, OtherStoragePolicy>& x ) const {
86-
87-
if (Rf_isNull( Rf_getAttrib(this_, R_NamesSymbol) )) {
88-
stop("can't subset a nameless vector using a CharacterVector");
89-
}
90-
91-
Vector<STRSXP, StoragePolicy> names = as< Vector<STRSXP, StoragePolicy> >(Rf_getAttrib(this_, R_NamesSymbol));
92-
Vector<INTSXP, StoragePolicy> idx = match(x, names); // match returns 1-based index
93-
94-
// apparently, we don't see sugar, so we have to populate an (index - 1) manually
95-
Vector<INTSXP, StoragePolicy> idxm1 = no_init(idx.size());
96-
for (int i=0; i < idx.size(); ++i) {
97-
idxm1[i] = idx[i] - 1;
98-
}
99-
100-
Vector<RTYPE, StoragePolicy> output = subset_impl(this_, idxm1);
101-
int n = output.size();
102-
if (n == 0) return Vector<RTYPE, StoragePolicy>(0);
103-
Vector<STRSXP, StoragePolicy> out_names = no_init(n);
69+
return *this;
70+
}
71+
72+
// Enable e.g. x[y] = 1;
73+
// TODO: std::enable_if<primitive> with C++11
74+
SubsetProxy& operator=(double other) {
75+
int n = indices.size();
76+
for (int i=0; i < n; ++i) {
77+
lhs[ indices[i] ] = other;
78+
}
79+
return *this;
80+
}
81+
82+
SubsetProxy& operator=(int other) {
83+
int n = indices.size();
84+
for (int i=0; i < n; ++i) {
85+
lhs[ indices[i] ] = other;
86+
}
87+
return *this;
88+
}
89+
90+
SubsetProxy& operator=(const char* other) {
91+
int n = indices.size();
92+
for (int i=0; i < n; ++i) {
93+
lhs[ indices[i] ] = other;
94+
}
95+
return *this;
96+
}
97+
98+
SubsetProxy& operator=(bool other) {
99+
int n = indices.size();
100+
for (int i=0; i < n; ++i) {
101+
lhs[ indices[i] ] = other;
102+
}
103+
return *this;
104+
}
105+
106+
template <int OtherRTYPE, template <class> class OtherStoragePolicy>
107+
operator Vector<OtherRTYPE, OtherStoragePolicy>() const {
108+
int n = indices.size();
109+
Vector<OtherRTYPE, OtherStoragePolicy> output = no_init(n);
110+
for (int i=0; i < n; ++i) {
111+
output[i] = lhs[ indices[i] ];
112+
}
113+
SEXP names = Rf_getAttrib(lhs, R_NamesSymbol);
114+
if (!Rf_isNull(names)) {
115+
Shield<SEXP> out_names( Rf_allocVector(STRSXP, n) );
104116
for (int i=0; i < n; ++i) {
105-
out_names[i] = names[ idx[i] - 1 ];
117+
SET_STRING_ELT(out_names, i, STRING_ELT(names, indices[i]));
106118
}
107-
output.attr("names") = out_names;
108-
return output;
119+
Rf_setAttrib(output, R_NamesSymbol, out_names);
109120
}
110-
111-
// Subsetting for integers -- note that it is 0-based
112-
template <template <class> class OtherStoragePolicy>
113-
inline Vector<RTYPE, StoragePolicy>
114-
subset_impl( const VECTOR this_, const Vector<INTSXP, OtherStoragePolicy>& x ) const {
115-
int n = x.size();
116-
if (n == 0) return this_;
117-
Vector<RTYPE, StoragePolicy> output = no_init(n);
118-
for (int i=0; i < n; ++i) {
119-
if (x[i] == NA_INTEGER) output[i] = traits::get_na<RTYPE>();
120-
#ifndef RCPP_NO_BOUNDS_CHECK
121-
else if (x[i] < 0) stop("Index error: tried to index < 0");
122-
else if (x[i] > this_.size() - 1) stop("Index error: tried to index above vector size");
123-
#endif
124-
else output[i] = (this_)[ x[i] ];
121+
return output;
122+
}
123+
124+
private:
125+
126+
#ifndef RCPP_NO_BOUNDS_CHECK
127+
void check_indices(int* x, int n, int size) {
128+
for (int i=0; i < n; ++i) {
129+
if (x[i] < 0 or x[i] >= size) {
130+
stop("index error");
125131
}
132+
}
133+
}
134+
#else
135+
void check_indices(int* x, int n, int size) {}
136+
#endif
126137

127-
if (!Rf_isNull( Rf_getAttrib( this_, R_NamesSymbol) )) {
128-
129-
Vector<STRSXP, StoragePolicy> thisnames =
130-
as<Vector<STRSXP, StoragePolicy> >(Rf_getAttrib(this_, R_NamesSymbol));
131-
132-
Vector<STRSXP, StoragePolicy> outnames = no_init(n);
133-
for (int i=0; i < n; ++i) {
134-
if (x[i] == NA_INTEGER) outnames[i] = NA_STRING;
135-
#ifndef RCPP_NO_BOUNDS_CHECK
136-
else if (x[i] > this_.size() - 1) outnames[i] = NA_STRING;
137-
#endif
138-
else if (x[i] >= 0) outnames[i] = thisnames[ x[i] ];
139-
}
140-
output.attr("names") = outnames;
141-
}
142-
return wrap(output);
138+
void get_indices( traits::identity< traits::int2type<INTSXP> > t ) {
139+
int* ptr = INTEGER( rhs );
140+
check_indices(ptr, rhs_n, lhs_n);
141+
for (int i=0; i < rhs_n; ++i) {
142+
indices.push_back( ptr[i] );
143143
}
144-
145-
// Subsetting for numerics -- coerce to integer
146-
template <template <class> class OtherStoragePolicy>
147-
Vector<RTYPE, StoragePolicy>
148-
subset_impl( const VECTOR this_, const Vector<REALSXP, OtherStoragePolicy>& x ) const {
149-
return subset_impl(this_, as< Vector<INTSXP, OtherStoragePolicy> >(x) );
144+
}
145+
146+
void get_indices( traits::identity< traits::int2type<REALSXP> > t ) {
147+
Vector<INTSXP, StoragePolicy> tmp =
148+
as< Vector<INTSXP, StoragePolicy> >(rhs);
149+
int* ptr = INTEGER(tmp);
150+
check_indices(ptr, rhs_n, lhs_n);
151+
for (int i=0; i < rhs_n; ++i) {
152+
indices.push_back( tmp[i] );
150153
}
151-
152-
const VECTOR& vec;
153-
const T& other;
154-
155-
};
154+
}
155+
156+
void get_indices( traits::identity< traits::int2type<STRSXP> > t ) {
157+
SEXP names = Rf_getAttrib(lhs, R_NamesSymbol);
158+
if (Rf_isNull(names)) stop("names is null");
159+
for (int i=0; i < rhs_n; ++i) {
160+
indices.push_back( find(names, CHAR( STRING_ELT(rhs, i) )) );
161+
}
162+
}
163+
164+
int find(const RHS_t& names, const char* str) {
165+
for (int i=0; i < lhs_n; ++i) {
166+
if (strcmp( CHAR( STRING_ELT( names, i) ), str) == 0) return i;
167+
}
168+
stop("no name found");
169+
return -1;
170+
}
171+
172+
void get_indices( traits::identity< traits::int2type<LGLSXP> > t ) {
173+
if (lhs_n != rhs_n) {
174+
stop("logical subsetting requires vectors of identical size");
175+
}
176+
int* ptr = LOGICAL(rhs);
177+
for (int i=0; i < rhs_n; ++i) {
178+
if (ptr[i] == NA_INTEGER) {
179+
stop("can't subset using a logical vector with NAs");
180+
}
181+
if (ptr[i]) {
182+
indices.push_back(i);
183+
}
184+
}
185+
}
186+
187+
LHS_t& lhs;
188+
const RHS_t& rhs;
189+
int lhs_n;
190+
int rhs_n;
191+
std::vector<int> indices;
192+
193+
};
156194

157-
} // namespace Rcpp
195+
}
158196

159197
#endif

inst/include/Rcpp/vector/Vector.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -314,9 +314,13 @@ class Vector :
314314
}
315315

316316
// sugar subsetting requires dispatch on VectorBase
317-
template <int RTYPE2, bool na, typename T>
318-
Vector operator[](const VectorBase<RTYPE2, na, T>& rhs) const {
319-
return Subsetter<RTYPE, StoragePolicy, Vector<RTYPE2, PreserveStorage> >(*this, rhs);
317+
template <int RHS_RTYPE, bool RHS_NA, typename RHS_T>
318+
SubsetProxy<RTYPE, StoragePolicy, RHS_RTYPE, RHS_NA, RHS_T>
319+
operator[](const VectorBase<RHS_RTYPE, RHS_NA, RHS_T>& rhs) {
320+
return SubsetProxy<RTYPE, StoragePolicy, RHS_RTYPE, RHS_NA, RHS_T>(
321+
*this,
322+
rhs
323+
);
320324
}
321325

322326
Vector& sort(){

inst/unitTests/cpp/Subset.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,9 @@ NumericVector subset_test_greater_0(NumericVector x) {
4545
List subset_test_literal(List x) {
4646
return x["foo"];
4747
}
48+
49+
// [[Rcpp::export]]
50+
NumericVector subset_test_assign(NumericVector x) {
51+
x[ x > 0 ] = 0;
52+
return x;
53+
}

inst/unitTests/runit.subset.R

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ if (.runThisTest) {
4242
checkException( subset_test_char( c(1, 2, 3), 'a' ),
4343
"character subsetting -- no names on x")
4444

45-
lgcl <- c(TRUE, FALSE, NA, TRUE, TRUE)
45+
lgcl <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
4646
checkIdentical(
4747
x[lgcl],
4848
subset_test_lgcl(x, lgcl),
@@ -65,6 +65,12 @@ if (.runThisTest) {
6565
subset_test_greater_0(x),
6666
"sugar subsetting (x[x > 0])")
6767

68+
x <- as.numeric(-2:2)
69+
checkIdentical(
70+
c(-2, -1, 0, 0, 0),
71+
subset_test_assign(x)
72+
)
73+
6874
}
6975

7076
}

0 commit comments

Comments
 (0)