Skip to content

Commit 458b6ab

Browse files
jonkeanepaleolimbot
authored andcommitted
GH-45949: [R] Fix CRAN warnings for 19.0.1 about compiled code (#45951)
This gets rid of `OBJECT`, `DATAPTR` has been replaced with `INTEGER()`, `REAL()`, etc. though strings are more complicated. I will fully admit that this C++ is stretching my comfort zone, so might include obviously wrong things! CI is currently failing, but I'm not totally sure yet if that means the code changes here are wrong or if maybe these allow us to have slightly different assumptions about materialization (see #45951 (comment)) I've also requested reviews broadly for folks I know have been around this code before, I appreciate any effort that y'all can spare 🙏 * GitHub Issue: #45949 Lead-authored-by: Jonathan Keane <[email protected]> Co-authored-by: Dewey Dunnington <[email protected]> Signed-off-by: Jonathan Keane <[email protected]>
1 parent aa6c0c0 commit 458b6ab

File tree

4 files changed

+66
-32
lines changed

4 files changed

+66
-32
lines changed

r/src/altrep.cpp

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ const std::shared_ptr<ChunkedArray>& GetChunkedArray(SEXP alt) {
115115
// materialization is needed.
116116
// data2: starts as NULL, and becomes a standard R vector with the same
117117
// data if necessary: if materialization is needed, e.g. if we need
118-
// to access its data pointer, with DATAPTR().
118+
// to access its data pointer, with INTEGER(), REAL(), etc.
119119
template <typename Impl>
120120
struct AltrepVectorBase {
121121
// store the Array as an external pointer in data1, mark as immutable
@@ -220,7 +220,14 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex
220220
SEXP copy = PROTECT(Rf_allocVector(sexp_type, size));
221221

222222
// copy the data from the array, through Get_region
223-
Get_region(alt, 0, size, reinterpret_cast<c_type*>(DATAPTR(copy)));
223+
if constexpr (std::is_same_v<c_type, double>) {
224+
Get_region(alt, 0, size, REAL(copy));
225+
} else if constexpr (std::is_same_v<c_type, int>) {
226+
Get_region(alt, 0, size, INTEGER(copy));
227+
} else {
228+
static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, int>,
229+
"ALTREP not implemented for this c_type");
230+
}
224231

225232
// store as data2, this is now considered materialized
226233
SetRepresentation(alt, copy);
@@ -269,13 +276,27 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex
269276
}
270277

271278
// Otherwise we have to materialize and hand the pointer to data2
272-
return DATAPTR(Materialize(alt));
279+
if constexpr (std::is_same_v<c_type, double>) {
280+
return REAL(Materialize(alt));
281+
} else if constexpr (std::is_same_v<c_type, int>) {
282+
return INTEGER(Materialize(alt));
283+
} else {
284+
static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, int>,
285+
"ALTREP not implemented for this c_type");
286+
}
273287
}
274288

275289
// The value at position i
276290
static c_type Elt(SEXP alt, R_xlen_t i) {
277291
if (IsMaterialized(alt)) {
278-
return reinterpret_cast<c_type*>(DATAPTR(Representation(alt)))[i];
292+
if constexpr (std::is_same_v<c_type, double>) {
293+
return REAL(Representation(alt))[i];
294+
} else if constexpr (std::is_same_v<c_type, int>) {
295+
return INTEGER(Representation(alt))[i];
296+
} else {
297+
static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, int>,
298+
"ALTREP not implemented for this c_type");
299+
}
279300
}
280301

281302
auto altrep_data =
@@ -531,7 +552,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
531552
SEXP copy = PROTECT(Rf_allocVector(INTSXP, size));
532553

533554
// copy the data from the array, through Get_region
534-
Get_region(alt, 0, size, reinterpret_cast<int*>(DATAPTR(copy)));
555+
Get_region(alt, 0, size, INTEGER(copy));
535556

536557
// store as data2, this is now considered materialized
537558
SetRepresentation(alt, copy);
@@ -552,7 +573,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
552573
return nullptr;
553574
}
554575

555-
static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); }
576+
static void* Dataptr(SEXP alt, Rboolean writeable) { return INTEGER(Materialize(alt)); }
556577

557578
static SEXP Duplicate(SEXP alt, Rboolean /* deep */) {
558579
// the representation integer vector
@@ -892,7 +913,9 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
892913
return s;
893914
}
894915

895-
static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); }
916+
static void* Dataptr(SEXP alt, Rboolean writeable) {
917+
return const_cast<void*>(DATAPTR_RO(Materialize(alt)));
918+
}
896919

897920
static SEXP Materialize(SEXP alt) {
898921
if (Base::IsMaterialized(alt)) {
@@ -931,7 +954,9 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
931954
}
932955

933956
static const void* Dataptr_or_null(SEXP alt) {
934-
if (Base::IsMaterialized(alt)) return DATAPTR(Representation(alt));
957+
if (Base::IsMaterialized(alt)) {
958+
return DATAPTR_RO(alt);
959+
}
935960

936961
// otherwise give up
937962
return nullptr;
@@ -1267,21 +1292,14 @@ sexp test_arrow_altrep_copy_by_dataptr(sexp x) {
12671292

12681293
if (TYPEOF(x) == INTSXP) {
12691294
cpp11::writable::integers out(Rf_xlength(x));
1270-
int* ptr = reinterpret_cast<int*>(DATAPTR(x));
1295+
int* ptr = INTEGER(x);
12711296
for (R_xlen_t i = 0; i < n; i++) {
12721297
out[i] = ptr[i];
12731298
}
12741299
return out;
12751300
} else if (TYPEOF(x) == REALSXP) {
12761301
cpp11::writable::doubles out(Rf_xlength(x));
1277-
double* ptr = reinterpret_cast<double*>(DATAPTR(x));
1278-
for (R_xlen_t i = 0; i < n; i++) {
1279-
out[i] = ptr[i];
1280-
}
1281-
return out;
1282-
} else if (TYPEOF(x) == STRSXP) {
1283-
cpp11::writable::strings out(Rf_xlength(x));
1284-
SEXP* ptr = reinterpret_cast<SEXP*>(DATAPTR(x));
1302+
double* ptr = REAL(x);
12851303
for (R_xlen_t i = 0; i < n; i++) {
12861304
out[i] = ptr[i];
12871305
}

r/src/arrow_types.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,14 +173,32 @@ template <typename RVector>
173173
class RBuffer : public MutableBuffer {
174174
public:
175175
explicit RBuffer(RVector vec)
176-
: MutableBuffer(reinterpret_cast<uint8_t*>(DATAPTR(vec)),
176+
: MutableBuffer(reinterpret_cast<uint8_t*>(getDataPointer(vec)),
177177
vec.size() * sizeof(typename RVector::value_type),
178178
arrow::CPUDevice::memory_manager(gc_memory_pool())),
179179
vec_(vec) {}
180180

181181
private:
182182
// vec_ holds the memory
183183
RVector vec_;
184+
185+
static void* getDataPointer(RVector& vec) {
186+
if (TYPEOF(vec) == LGLSXP) {
187+
return LOGICAL(vec);
188+
} else if (TYPEOF(vec) == INTSXP) {
189+
return INTEGER(vec);
190+
} else if (TYPEOF(vec) == REALSXP) {
191+
return REAL(vec);
192+
} else if (TYPEOF(vec) == CPLXSXP) {
193+
return COMPLEX(vec);
194+
} else if (TYPEOF(vec) == STRSXP) {
195+
// We don't want to expose the string data here, so we error
196+
cpp11::stop("Operation not supported for string vectors.");
197+
} else {
198+
// raw
199+
return RAW(vec);
200+
}
201+
}
184202
};
185203

186204
std::shared_ptr<arrow::DataType> InferArrowTypeFromFactor(SEXP);

r/src/r_to_arrow.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,11 +1214,11 @@ bool can_reuse_memory(SEXP x, const std::shared_ptr<arrow::DataType>& type) {
12141214
// because MakeSimpleArray below will force materialization
12151215
switch (type->id()) {
12161216
case Type::INT32:
1217-
return TYPEOF(x) == INTSXP && !OBJECT(x);
1217+
return TYPEOF(x) == INTSXP && !Rf_isObject(x);
12181218
case Type::DOUBLE:
1219-
return TYPEOF(x) == REALSXP && !OBJECT(x);
1219+
return TYPEOF(x) == REALSXP && !Rf_isObject(x);
12201220
case Type::INT8:
1221-
return TYPEOF(x) == RAWSXP && !OBJECT(x);
1221+
return TYPEOF(x) == RAWSXP && !Rf_isObject(x);
12221222
case Type::INT64:
12231223
return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64");
12241224
default:
@@ -1412,17 +1412,17 @@ bool vector_from_r_memory(SEXP x, const std::shared_ptr<DataType>& type,
14121412

14131413
switch (type->id()) {
14141414
case Type::INT32:
1415-
return TYPEOF(x) == INTSXP && !OBJECT(x) &&
1415+
return TYPEOF(x) == INTSXP && !Rf_isObject(x) &&
14161416
vector_from_r_memory_impl<cpp11::integers, Int32Type>(x, type, columns, j,
14171417
tasks);
14181418

14191419
case Type::DOUBLE:
1420-
return TYPEOF(x) == REALSXP && !OBJECT(x) &&
1420+
return TYPEOF(x) == REALSXP && !Rf_isObject(x) &&
14211421
vector_from_r_memory_impl<cpp11::doubles, DoubleType>(x, type, columns, j,
14221422
tasks);
14231423

14241424
case Type::UINT8:
1425-
return TYPEOF(x) == RAWSXP && !OBJECT(x) &&
1425+
return TYPEOF(x) == RAWSXP && !Rf_isObject(x) &&
14261426
vector_from_r_memory_impl<cpp11::raws, UInt8Type>(x, type, columns, j,
14271427
tasks);
14281428

r/tests/testthat/test-altrep.R

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ test_that("element access methods for int32 ALTREP with nulls", {
170170
expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original)
171171
expect_false(test_arrow_altrep_is_materialized(altrep))
172172

173-
# because there are no nulls, DATAPTR() does not materialize
173+
# because there are nulls, DATAPTR() does materialize
174174
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
175175
expect_true(test_arrow_altrep_is_materialized(altrep))
176176

@@ -193,7 +193,7 @@ test_that("element access methods for double ALTREP with nulls", {
193193
expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original)
194194
expect_false(test_arrow_altrep_is_materialized(altrep))
195195

196-
# because there are no nulls, DATAPTR() does not materialize
196+
# because there are nulls, DATAPTR() does materialize
197197
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
198198
expect_true(test_arrow_altrep_is_materialized(altrep))
199199

@@ -244,14 +244,13 @@ test_that("element access methods for character ALTREP", {
244244
expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
245245
expect_false(test_arrow_altrep_is_materialized(altrep))
246246

247-
# DATAPTR() should always materialize for strings
248-
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
247+
# match() calls DATAPTR() internally which materializes the vector
248+
match(altrep, c("1", "40", "999"))
249249
expect_true(test_arrow_altrep_is_materialized(altrep))
250250

251251
# test element access after materialization
252252
expect_true(test_arrow_altrep_is_materialized(altrep))
253253
expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
254-
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
255254
})
256255

257256
test_that("element access methods for character ALTREP from large_utf8()", {
@@ -265,14 +264,13 @@ test_that("element access methods for character ALTREP from large_utf8()", {
265264
expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
266265
expect_false(test_arrow_altrep_is_materialized(altrep))
267266

268-
# DATAPTR() should always materialize for strings
269-
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
267+
# match() calls DATAPTR() internally which materializes the vector
268+
match(altrep, c("1", "40", "999"))
270269
expect_true(test_arrow_altrep_is_materialized(altrep))
271270

272271
# test element access after materialization
273272
expect_true(test_arrow_altrep_is_materialized(altrep))
274273
expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
275-
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
276274
})
277275

278276
test_that("empty vectors are not altrep", {

0 commit comments

Comments
 (0)