-
Notifications
You must be signed in to change notification settings - Fork 15
Description
So, I've been debugging an issue all morning, and finally think I have a grasp of what's happening. It's not a fun one, so be warned.
The short version is:
I think when you assign the result of xt::reduce() back to an xt::rarray, something goes wrong, but if you assign it back to xt::xarray then coerce that to xt::rarray everything is fine. This only seems to show up when reducing over large-ish (>2500 elements) matrices.
The long version
I'm trying (and successfully!) to expose the "custom reducer" idea to the R user, so they can do something like rray_reducer(x, function(.x, .y) {.x + .y}) to get a reduced sum over a specified axis. It would be great because you can then use any R function as the reducer function (probably with some performance loss because you have to call back to R repeatedly from cpp, but im okay with that for now).
It works for small matrices (or maybe im getting lucky) but when I up the size to ~2500 elements or more, things fall apart.
The only way I can think to give you a reproducible example is to make a standalone cpp file that you can use Rcpp::sourceFile() on...so...I'm sorry about that.
This version goes to xarray first, then to rarray. It seems to work fine. In the rray_custom_reducer_impl function, with the comment // THIS IS THE PROBLEM LINE! is the section that is causing the issue. It works fine in this example because of the conversion first to xarray.
// [[Rcpp::depends(xtensorrr)]]
// [[Rcpp::plugins(cpp14)]]
#include <xtensor-r/rarray.hpp>
#include <xtensor/xreducer.hpp>
#include <xtensor/xio.hpp>
#include <xtensor/xarray.hpp>
#include <Rcpp.h>
using namespace Rcpp;
// How does this work?
// 1) rray_custom_reducer_cpp() takes x, a function with 2 args, the axes, and the return type
// 2) Based on the types of x and the return type, it calls something along the lines of:
// rray_custom_reducer_impl<TYPEOFX, RETURNTYPE>(x, f, axes);
// 3) rray_custom_reducer_impl
// - coerces x to an rarray
// - creates a binary_functor using f and all of the type info
// - then makes an xreducer_functor out of that and calls reduce using it
// 4) binary_functor this is where the magic happens
// it lets you use scoping so you can initialize it with "things" you'd like
// it to have available when the function is called (by the overloaded () operator)
// - So we give it the function and the SEXPTYPE return type in the constructor
// - And then we template on the Cpp element and return type
// - Each call to the overloaded () operator calls f and coerces the result
// to RET_T if it can be, otherwise error
// Functor allows a kind of scoping so r_functor(left, right)
// internally knows about the R function f
template <typename ELEM_T, typename RET_T>
class binary_functor {
private:
Rcpp::Function f;
SEXPTYPE RTYPE;
R_xlen_t good_length = 1;
public:
binary_functor(Rcpp::Function f_, SEXPTYPE RTYPE_) : f(f_), RTYPE(RTYPE_) { }
// Also pulling the first element of the result, even if it had more than
// one thing. Should do checking instead.
RET_T operator () (const ELEM_T& x, const ELEM_T& y) const {
//std::cout << "x: " << x << std::endl;
//std::cout << "y: " << y << std::endl;
SEXP f_res = f(x, y);
//Rcpp::print(f_res);
// Type check
if (TYPEOF(f_res) != RTYPE) {
const char* good_type = Rf_type2char(RTYPE);
const char* bad_type = Rf_type2char(TYPEOF(f_res));
Rf_errorcall(
R_NilValue,
"`.f` should return length 1 objects of class `%s`, not class `%s`.",
good_type,
bad_type
);
}
// Length check
if (Rf_xlength(f_res) != good_length) {
Rf_errorcall(
R_NilValue,
"`.f` should return objects of length 1, not %i.",
Rf_xlength(f_res)
);
}
// Clever conversion to replace explicit use of
// REAL() or INTEGER()
RET_T res = ((RET_T *)DATAPTR(f_res))[0];
//std::cout << "res: " << res << std::endl;
return(res);
}
};
template <typename ELEM_R_T, typename RET_R_T>
SEXP rray_custom_reducer_impl(SEXP x, Rcpp::Function f, std::vector<std::size_t> axes) {
// Underlying Cpp type. This helper is necessary because it converts
// rlogical->int
using elem_t = xt::r_detail::get_underlying_value_type_r<ELEM_R_T>;
using ret_t = xt::r_detail::get_underlying_value_type_r<RET_R_T>;
// Pull the SEXPTYPE of the return value from the cpp type
// The helper is necessary because rlogical->LGLSXP has been specially registered
static constexpr int ret_sexptype = Rcpp::traits::r_sexptype_traits<RET_R_T>::rtype;
// Create the rarray with the type matching x
const xt::rarray<ELEM_R_T>& x_rray = xt::rarray<ELEM_R_T>(x);
// Create the functor
auto r_functor = binary_functor<typename elem_t::type, typename ret_t::type>(f, ret_sexptype);
auto xr_functor = xt::make_xreducer_functor(r_functor);
// THIS IS THE PROBLEM LINE!
xt::xarray<typename ret_t::type> res = xt::reduce(xr_functor, x_rray, axes);
xt::rarray<RET_R_T> res2 = res;
std::cout << "xarray: " << res << std::endl;
std::cout << "rarray: " << res2 << std::endl;
return(res2);
}
// Helper for switching on the string op
constexpr unsigned int str2int(const char* str, int h = 0) {
return !str[h] ? 5381 : (str2int(str, h+1) * 33) ^ str[h];
}
// [[Rcpp::export]]
SEXP rray_custom_reducer_cpp(SEXP x, Rcpp::Function f, std::vector<std::size_t> axes, SEXP type_) {
// Collect the char from the string type ("double", "integer", "logical")
const char* type = CHAR(Rf_asChar(type_));
// Switch on X
switch(TYPEOF(x)) {
// This means ELEM_T = double
case REALSXP: {
// Switch on return type:
// "double" = REALSXP
// "integer" = INTSXP
// "logical" = LGLSXP
switch(str2int(type)) {
case str2int("double"): {
return rray_custom_reducer_impl<double, double>(x, f, axes);
}
case str2int("integer"): {
return rray_custom_reducer_impl<double, int>(x, f, axes);
}
case str2int("logical"): {
return rray_custom_reducer_impl<double, rlogical>(x, f, axes);
}
default: {
Rcpp::stop("Incompatible SEXP encountered; only accepts doubles, integers, and logicals.");
}
}
}
// This means ELEM_T = int
case INTSXP: {
switch(str2int(type)) {
case str2int("double"): {
return rray_custom_reducer_impl<int, double>(x, f, axes);
}
case str2int("integer"): {
return rray_custom_reducer_impl<int, int>(x, f, axes);
}
case str2int("logical"): {
return rray_custom_reducer_impl<int, rlogical>(x, f, axes);
}
default: {
Rcpp::stop("Incompatible SEXP encountered; only accepts doubles, integers, and logicals.");
}
}
}
// This means ELEM_T = int
case LGLSXP: {
switch(str2int(type)) {
case str2int("double"): {
return rray_custom_reducer_impl<rlogical, double>(x, f, axes);
}
case str2int("integer"): {
return rray_custom_reducer_impl<rlogical, int>(x, f, axes);
}
case str2int("logical"): {
return rray_custom_reducer_impl<rlogical, rlogical>(x, f, axes);
}
default: {
Rcpp::stop("Incompatible SEXP encountered; only accepts doubles, integers, and logicals.");
}
}
}
default: {
Rcpp::stop("Incompatible SEXP encountered; only accepts doubles, integers, and logicals.");
}
}
}
Correct results:
Rcpp::sourceCpp("~/Desktop/test.cpp")
x <- matrix(as.double(1:5000), ncol = 2)
fn <- function(.x, .y) {.x + .y}
rray_custom_reducer_cpp(x, fn, 0L, "double")
#> [1] 3126250 9376250standard output and standard error
xarray: { 3126250., 9376250.}
rarray: { 3126250., 9376250.}Created on 2018-12-09 by the reprex package (v0.2.1.9000)
This is the not working version, even though I think it should work. It's more...unstable...than actually not working as the rarray result has variable output. The only thing I changed is that problem line, and now I just return 1 because I don't trust the output. But a key thing to look at here is the std output, where I've printed resulting rarray objects from the two calls to the same function. I had to try and run it multiple times because sometimes it just crashes...
// [[Rcpp::depends(xtensorrr)]]
// [[Rcpp::plugins(cpp14)]]
#include <xtensor-r/rarray.hpp>
#include <xtensor/xreducer.hpp>
#include <xtensor/xio.hpp>
#include <xtensor/xarray.hpp>
#include <Rcpp.h>
using namespace Rcpp;
// How does this work?
// 1) rray_custom_reducer_cpp() takes x, a function with 2 args, the axes, and the return type
// 2) Based on the types of x and the return type, it calls something along the lines of:
// rray_custom_reducer_impl<TYPEOFX, RETURNTYPE>(x, f, axes);
// 3) rray_custom_reducer_impl
// - coerces x to an rarray
// - creates a binary_functor using f and all of the type info
// - then makes an xreducer_functor out of that and calls reduce using it
// 4) binary_functor this is where the magic happens
// it lets you use scoping so you can initialize it with "things" you'd like
// it to have available when the function is called (by the overloaded () operator)
// - So we give it the function and the SEXPTYPE return type in the constructor
// - And then we template on the Cpp element and return type
// - Each call to the overloaded () operator calls f and coerces the result
// to RET_T if it can be, otherwise error
// Functor allows a kind of scoping so r_functor(left, right)
// internally knows about the R function f
template <typename ELEM_T, typename RET_T>
class binary_functor {
private:
Rcpp::Function f;
SEXPTYPE RTYPE;
R_xlen_t good_length = 1;
public:
binary_functor(Rcpp::Function f_, SEXPTYPE RTYPE_) : f(f_), RTYPE(RTYPE_) { }
// Also pulling the first element of the result, even if it had more than
// one thing. Should do checking instead.
RET_T operator () (const ELEM_T& x, const ELEM_T& y) const {
//std::cout << "x: " << x << std::endl;
//std::cout << "y: " << y << std::endl;
SEXP f_res = f(x, y);
//Rcpp::print(f_res);
// Type check
if (TYPEOF(f_res) != RTYPE) {
const char* good_type = Rf_type2char(RTYPE);
const char* bad_type = Rf_type2char(TYPEOF(f_res));
Rf_errorcall(
R_NilValue,
"`.f` should return length 1 objects of class `%s`, not class `%s`.",
good_type,
bad_type
);
}
// Length check
if (Rf_xlength(f_res) != good_length) {
Rf_errorcall(
R_NilValue,
"`.f` should return objects of length 1, not %i.",
Rf_xlength(f_res)
);
}
// Clever conversion to replace explicit use of
// REAL() or INTEGER()
RET_T res = ((RET_T *)DATAPTR(f_res))[0];
//std::cout << "res: " << res << std::endl;
return(res);
}
};
template <typename ELEM_R_T, typename RET_R_T>
SEXP rray_custom_reducer_impl(SEXP x, Rcpp::Function f, std::vector<std::size_t> axes) {
// Underlying Cpp type. This helper is necessary because it converts
// rlogical->int
using elem_t = xt::r_detail::get_underlying_value_type_r<ELEM_R_T>;
using ret_t = xt::r_detail::get_underlying_value_type_r<RET_R_T>;
// Pull the SEXPTYPE of the return value from the cpp type
// The helper is necessary because rlogical->LGLSXP has been specially registered
static constexpr int ret_sexptype = Rcpp::traits::r_sexptype_traits<RET_R_T>::rtype;
// Create the rarray with the type matching x
const xt::rarray<ELEM_R_T>& x_rray = xt::rarray<ELEM_R_T>(x);
// Create the functor
auto r_functor = binary_functor<typename elem_t::type, typename ret_t::type>(f, ret_sexptype);
auto xr_functor = xt::make_xreducer_functor(r_functor);
// THIS IS THE PROBLEM LINE!
xt::rarray<RET_R_T> res = xt::reduce(xr_functor, x_rray, axes);
std::cout << "rarray: " << res << std::endl;
return(Rf_ScalarInteger(1));
}
// Helper for switching on the string op
constexpr unsigned int str2int(const char* str, int h = 0) {
return !str[h] ? 5381 : (str2int(str, h+1) * 33) ^ str[h];
}
// [[Rcpp::export]]
SEXP rray_custom_reducer_cpp2(SEXP x, Rcpp::Function f, std::vector<std::size_t> axes, SEXP type_) {
// Collect the char from the string type ("double", "integer", "logical")
const char* type = CHAR(Rf_asChar(type_));
// Switch on X
switch(TYPEOF(x)) {
// This means ELEM_T = double
case REALSXP: {
// Switch on return type:
// "double" = REALSXP
// "integer" = INTSXP
// "logical" = LGLSXP
switch(str2int(type)) {
case str2int("double"): {
return rray_custom_reducer_impl<double, double>(x, f, axes);
}
case str2int("integer"): {
return rray_custom_reducer_impl<double, int>(x, f, axes);
}
case str2int("logical"): {
return rray_custom_reducer_impl<double, rlogical>(x, f, axes);
}
default: {
Rcpp::stop("Incompatible SEXP encountered; only accepts doubles, integers, and logicals.");
}
}
}
// This means ELEM_T = int
case INTSXP: {
switch(str2int(type)) {
case str2int("double"): {
return rray_custom_reducer_impl<int, double>(x, f, axes);
}
case str2int("integer"): {
return rray_custom_reducer_impl<int, int>(x, f, axes);
}
case str2int("logical"): {
return rray_custom_reducer_impl<int, rlogical>(x, f, axes);
}
default: {
Rcpp::stop("Incompatible SEXP encountered; only accepts doubles, integers, and logicals.");
}
}
}
// This means ELEM_T = int
case LGLSXP: {
switch(str2int(type)) {
case str2int("double"): {
return rray_custom_reducer_impl<rlogical, double>(x, f, axes);
}
case str2int("integer"): {
return rray_custom_reducer_impl<rlogical, int>(x, f, axes);
}
case str2int("logical"): {
return rray_custom_reducer_impl<rlogical, rlogical>(x, f, axes);
}
default: {
Rcpp::stop("Incompatible SEXP encountered; only accepts doubles, integers, and logicals.");
}
}
}
default: {
Rcpp::stop("Incompatible SEXP encountered; only accepts doubles, integers, and logicals.");
}
}
}
Sometimes this will crash on you, sometimes it will just give odd results...
Rcpp::sourceCpp("~/Desktop/test2.cpp")
x <- matrix(as.double(1:5000), ncol = 2)
fn <- function(.x, .y) {.x + .y}
rray_custom_reducer_cpp2(x, fn, 0L, "double")
#> [1] 1
rray_custom_reducer_cpp2(x, fn, 0L, "double")
#> [1] 1standard output and standard error
rarray: { 3126250.}
rarray: { 6.938981e-310}Created on 2018-12-09 by the reprex package (v0.2.1.9000)