Skip to content

Commit 0da7155

Browse files
committed
Throw exceptions rather than returning NAs on indexing out of bounds; allow unsafe subsetting if users really want it
1 parent 4f5d625 commit 0da7155

File tree

2 files changed

+33
-21
lines changed

2 files changed

+33
-21
lines changed

inst/include/Rcpp/vector/Subsetter.h

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,10 @@ namespace Rcpp {
4747
Subsetter() {};
4848

4949
// helper function used for the subset methods when going from logical to int
50+
// operates like R's which, but returns NA when it encounters an NA
5051
template <template <class> class OtherStoragePolicy>
5152
static Vector<INTSXP, StoragePolicy> which_na( const Vector<LGLSXP, OtherStoragePolicy>& x) {
53+
5254
std::vector<int> output;
5355
int n = x.size();
5456
output.reserve(n);
@@ -70,6 +72,9 @@ namespace Rcpp {
7072
// Subsetting for logicals
7173
template <template <class> class OtherStoragePolicy>
7274
inline Vector<RTYPE, StoragePolicy> subset_impl( const VECTOR& this_, const Vector<LGLSXP, OtherStoragePolicy>& x ) const {
75+
if (this_.size() != x.size()) {
76+
stop("unequal vector sizes");
77+
}
7378
Vector<INTSXP, StoragePolicy> tmp = which_na(x);
7479
if (!tmp.size()) return Vector<RTYPE, StoragePolicy>(0);
7580
else return subset_impl(this_, tmp);
@@ -78,40 +83,32 @@ namespace Rcpp {
7883
// Subsetting for characters
7984
template <template <class> class OtherStoragePolicy>
8085
inline Vector<RTYPE, StoragePolicy> subset_impl( const VECTOR& this_, const Vector<STRSXP, OtherStoragePolicy>& x ) const {
81-
86+
8287
if (Rf_isNull( Rf_getAttrib(this_, R_NamesSymbol) )) {
83-
int n = x.size();
84-
Vector<RTYPE, StoragePolicy> output(n);
85-
for (int i=0; i < n; ++i) {
86-
output[i] = traits::get_na<RTYPE>();
87-
}
88-
return output;
88+
stop("Tried to subset a vector with no names using a CharacterVector");
8989
}
9090

9191
Vector<STRSXP, StoragePolicy> names = as< Vector<STRSXP, StoragePolicy> >(Rf_getAttrib(this_, R_NamesSymbol));
9292
Vector<INTSXP, StoragePolicy> idx = match(x, names); // match returns 1-based index
93-
// apparently, we don't see sugar, so we have to do this manually
93+
94+
// apparently, we don't see sugar, so we have to populate an (index - 1) manually
9495
Vector<INTSXP, StoragePolicy> idxm1 = no_init(idx.size());
9596
for (int i=0; i < idx.size(); ++i) {
9697
idxm1[i] = idx[i] - 1;
9798
}
99+
98100
Vector<RTYPE, StoragePolicy> output = subset_impl(this_, idxm1);
99101
int n = output.size();
100102
if (n == 0) return Vector<RTYPE, StoragePolicy>(0);
101103
Vector<STRSXP, StoragePolicy> out_names = no_init(n);
102104
for (int i=0; i < n; ++i) {
103-
if (idx[i] == NA_INTEGER) out_names[i] = NA_STRING;
104-
else out_names[i] = names[ idx[i] - 1 ];
105+
out_names[i] = names[ idx[i] - 1 ];
105106
}
106107
output.attr("names") = out_names;
107108
return output;
108109
}
109110

110-
const VECTOR& vec;
111-
const T& other;
112-
113111
// Subsetting for integers -- note that it is 0-based
114-
// hidden until we decide whether to go with 0 or 1 based indexing
115112
template <template <class> class OtherStoragePolicy>
116113
inline Vector<RTYPE, StoragePolicy>
117114
subset_impl( const VECTOR this_, const Vector<INTSXP, OtherStoragePolicy>& x ) const {
@@ -120,8 +117,10 @@ namespace Rcpp {
120117
Vector<RTYPE, StoragePolicy> output = no_init(n);
121118
for (int i=0; i < n; ++i) {
122119
if (x[i] == NA_INTEGER) output[i] = traits::get_na<RTYPE>();
123-
else if (x[i] < 0) stop("Cannot subset with an integer vector with elements <= 0");
124-
else if (x[i] > this_.size() - 1) output[i] = traits::get_na<RTYPE>();
120+
#ifndef RCPP_NO_BOUNDS_CHECK
121+
else if (x[i] < 0) stop("Index error: tried to index < 0");
122+
else if (x[i] > this_.size() - 1) stop("Index error: tried to index above vector size");
123+
#endif
125124
else output[i] = (this_)[ x[i] ];
126125
}
127126

@@ -133,13 +132,25 @@ namespace Rcpp {
133132
Vector<STRSXP, StoragePolicy> outnames = no_init(n);
134133
for (int i=0; i < n; ++i) {
135134
if (x[i] == NA_INTEGER) outnames[i] = NA_STRING;
135+
#ifndef RCPP_NO_BOUNDS_CHECK
136136
else if (x[i] > this_.size() - 1) outnames[i] = NA_STRING;
137+
#endif
137138
else if (x[i] >= 0) outnames[i] = thisnames[ x[i] ];
138139
}
139140
output.attr("names") = outnames;
140141
}
141142
return wrap(output);
142143
}
144+
145+
// Subsetting for numerics -- coerce to integer
146+
template <template <class> class OtherStoragePolicy>
147+
Vector<RTYPE, StoragePolicy>
148+
subset_impl( const VECTOR this_, const Vector<REALSXP, OtherStoragePolicy>& x ) const {
149+
return subset_impl(this_, as< Vector<INTSXP, OtherStoragePolicy> >(x) );
150+
}
151+
152+
const VECTOR& vec;
153+
const T& other;
143154

144155
};
145156

inst/unitTests/runit.subset.R

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@ if (.runThisTest) {
2929
x <- rnorm(5)
3030
names(x) <- letters[1:5]
3131

32-
checkIdentical( x[c(1, 2, 10)], subset_test_int(x, c(0L, 1L, 9L)),
33-
"integer subsetting")
32+
checkIdentical( x[c(1, 2, 3)], subset_test_int(x, c(0L, 1L, 2L)) )
33+
checkException( subset_test_int(x, -1L) )
34+
checkException( subset_test_int(x, length(x)) )
3435

3536
checkIdentical( x[ c('b', 'a') ], subset_test_char(x, c('b', 'a')),
3637
"character subsetting")
3738

38-
checkIdentical( c(1, 2, 3)['a'], subset_test_char( c(1, 2, 3), 'a' ),
39+
checkException( subset_test_char( c(1, 2, 3), 'a' ),
3940
"character subsetting -- no names on x")
4041

4142
lgcl <- c(TRUE, FALSE, NA, TRUE, TRUE)
@@ -52,8 +53,8 @@ if (.runThisTest) {
5253

5354
l <- as.list(x)
5455
checkIdentical(
55-
l[c('b', 'Q')],
56-
subset_test_list(x, c('b', 'Q')),
56+
l[c('b', 'c')],
57+
subset_test_list(x, c('b', 'c')),
5758
"list subsetting")
5859

5960
checkIdentical(

0 commit comments

Comments
 (0)