Skip to content

Commit 1904331

Browse files
committed
Added skeleton of batch based GPU assignment
1 parent ae52796 commit 1904331

File tree

9 files changed

+220
-16
lines changed

9 files changed

+220
-16
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ set(XTENSOR_HEADERS
131131
${XTENSOR_INCLUDE_DIR}/xtensor/xcomplex.hpp
132132
${XTENSOR_INCLUDE_DIR}/xtensor/xcontainer.hpp
133133
${XTENSOR_INCLUDE_DIR}/xtensor/xcsv.hpp
134+
${XTENSOR_INCLUDE_DIR}/xtensor/xdevice.hpp
134135
${XTENSOR_INCLUDE_DIR}/xtensor/xdynamic_view.hpp
135136
${XTENSOR_INCLUDE_DIR}/xtensor/xeval.hpp
136137
${XTENSOR_INCLUDE_DIR}/xtensor/xexception.hpp

include/xtensor/xassign.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,17 @@ namespace xt
168168
static void run_impl(E1& e1, const E2& e2, std::false_type);
169169
};
170170

171+
class device_assigner
172+
{
173+
public:
174+
175+
template <class E1, class E2>
176+
static void run(E1& e1, const E2& e2)
177+
{
178+
e1.store_device(e2.load_device());
179+
}
180+
};
181+
171182
/*************************
172183
* strided_loop_assigner *
173184
*************************/
@@ -463,7 +474,8 @@ namespace xt
463474
// in compilation error for expressions that do not provide a SIMD interface.
464475
// simd_assign is true if simd_linear_assign() or simd_linear_assign(de1, de2)
465476
// is true.
466-
linear_assigner<simd_assign>::run(de1, de2);
477+
//linear_assigner<simd_assign>::run(de1, de2);
478+
device_assigner::run(de1, de2);
467479
}
468480
else
469481
{

include/xtensor/xcontainer.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "xstrides.hpp"
2828
#include "xtensor_config.hpp"
2929
#include "xtensor_forward.hpp"
30+
#include "xdevice.hpp"
3031

3132
namespace xt
3233
{
@@ -112,6 +113,8 @@ namespace xt
112113
using reverse_linear_iterator = typename iterable_base::reverse_linear_iterator;
113114
using const_reverse_linear_iterator = typename iterable_base::const_reverse_linear_iterator;
114115

116+
using container_device_return_type_t = host_device_batch<value_type>;
117+
115118
static_assert(static_layout != layout_type::any, "Container layout can never be layout_type::any!");
116119

117120
size_type size() const noexcept;
@@ -187,6 +190,19 @@ namespace xt
187190
container_simd_return_type_t<storage_type, value_type, requested_type>
188191
/*simd_return_type<requested_type>*/ load_simd(size_type i) const;
189192

193+
template<class device_batch>
194+
void store_device(device_batch&& e)
195+
{
196+
//check length matching
197+
e.store_host(storage().data());
198+
}
199+
200+
container_device_return_type_t load_device() const
201+
{
202+
auto ptr = data();
203+
return container_device_return_type_t(ptr, size());
204+
}
205+
190206
linear_iterator linear_begin() noexcept;
191207
linear_iterator linear_end() noexcept;
192208

include/xtensor/xdevice.hpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#ifndef XTENSOR_DEVICE_HPP
2+
#define XTENSOR_DEVICE_HPP
3+
4+
#include <memory>
5+
#include <algorithm>
6+
#include <functional>
7+
#include <vector>
8+
9+
namespace xt{
10+
namespace detail{
11+
12+
}
13+
/**
14+
* Device implementation for the various operations. All device specific code goes in here disabled via macro
15+
* for invalid syntax which might be needed for Sycl or CUDA.
16+
*/
17+
//#ifdef XTENSOR_DEVICE_ASSIGN
18+
template<class T>
19+
class host_device_batch
20+
{
21+
public:
22+
host_device_batch(const T* ptr, std::size_t size)
23+
{
24+
//copy the data to the device
25+
//CUDA Impl = Nearly identical
26+
m_data.resize(size);
27+
std::copy(ptr, ptr + size, std::begin(m_data));
28+
}
29+
template<class A>
30+
host_device_batch& operator+(const host_device_batch<A>& rhs)
31+
{
32+
//CUDA impl = thrust::transform(m_data.begin(), m_data.end(), rhs.m_data().begin(), m_data.end(), thrust::plus<T>{});
33+
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::plus<T>{});
34+
return *this;
35+
}
36+
template<class A>
37+
host_device_batch& operator-(const host_device_batch<A>& rhs)
38+
{
39+
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::minus<T>{});
40+
return *this;
41+
}
42+
template<class A>
43+
host_device_batch& operator*(const host_device_batch<A>& rhs)
44+
{
45+
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::multiplies<T>{});
46+
return *this;
47+
}
48+
template<class A>
49+
host_device_batch& operator/(const host_device_batch<A>& rhs)
50+
{
51+
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::divides<T>{});
52+
return *this;
53+
}
54+
void store_host(T* dst)
55+
{
56+
std::copy(std::begin(m_data), std::end(m_data), dst);
57+
}
58+
private:
59+
//CUDA impl = thrust::device_vector<T> m_data;
60+
std::vector<T> m_data;
61+
};
62+
//#endif
63+
64+
// template<class T>
65+
// class cuda_device_batch : public batch<host_device_batch<T>>
66+
// {
67+
// public:
68+
69+
// };
70+
71+
// template<class T>
72+
// class intel_device_batch : public batch<host_device_batch<T>>
73+
// {
74+
// public:
75+
76+
// };
77+
78+
// template<class T>
79+
// class opencl_device_batch : public batch<host_device_batch<T>>
80+
// {
81+
// public:
82+
83+
// };
84+
}
85+
86+
#endif

include/xtensor/xfunction.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "xstrides.hpp"
3232
#include "xtensor_simd.hpp"
3333
#include "xutils.hpp"
34+
#include "xdevice.hpp"
3435

3536
namespace xt
3637
{
@@ -283,6 +284,7 @@ namespace xt
283284
using const_iterator = typename iterable_base::const_iterator;
284285
using reverse_iterator = typename iterable_base::reverse_iterator;
285286
using const_reverse_iterator = typename iterable_base::const_reverse_iterator;
287+
using device_return_type = host_device_batch<value_type>;
286288

287289
template <class Func, class... CTA, class U = std::enable_if_t<!std::is_base_of<std::decay_t<Func>, self_type>::value>>
288290
xfunction(Func&& f, CTA&&... e) noexcept;
@@ -361,6 +363,8 @@ namespace xt
361363
template <class align, class requested_type = value_type, std::size_t N = xt_simd::simd_traits<requested_type>::size>
362364
simd_return_type<requested_type> load_simd(size_type i) const;
363365

366+
device_return_type load_device() const;
367+
364368
const tuple_type& arguments() const noexcept;
365369

366370
const functor_type& functor() const noexcept;
@@ -385,6 +389,9 @@ namespace xt
385389
template <class align, class requested_type, std::size_t N, std::size_t... I>
386390
auto load_simd_impl(std::index_sequence<I...>, size_type i) const;
387391

392+
template <std::size_t... I>
393+
inline auto load_device_impl(std::index_sequence<I...>) const;
394+
388395
template <class Func, std::size_t... I>
389396
const_stepper build_stepper(Func&& f, std::index_sequence<I...>) const noexcept;
390397

@@ -844,6 +851,12 @@ namespace xt
844851
return operator()();
845852
}
846853

854+
template <class F, class... CT>
855+
inline auto xfunction<F, CT...>::load_device() const -> device_return_type
856+
{
857+
return load_device_impl(std::make_index_sequence<sizeof...(CT)>());
858+
}
859+
847860
template <class F, class... CT>
848861
template <class align, class requested_type, std::size_t N>
849862
inline auto xfunction<F, CT...>::load_simd(size_type i) const -> simd_return_type<requested_type>
@@ -912,6 +925,13 @@ namespace xt
912925
return m_f.simd_apply((std::get<I>(m_e).template load_simd<align, requested_type>(i))...);
913926
}
914927

928+
template <class F, class... CT>
929+
template <std::size_t... I>
930+
inline auto xfunction<F, CT...>::load_device_impl(std::index_sequence<I...>) const
931+
{
932+
return m_f.device_apply((std::get<I>(m_e).load_device())...);
933+
}
934+
915935
template <class F, class... CT>
916936
template <class Func, std::size_t... I>
917937
inline auto xfunction<F, CT...>::build_stepper(Func&& f, std::index_sequence<I...>) const noexcept

include/xtensor/xmath.hpp

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,21 +81,27 @@ namespace xt
8181
XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, unsigned long long);
8282

8383

84-
#define XTENSOR_UNARY_MATH_FUNCTOR(NAME) \
85-
struct NAME##_fun \
86-
{ \
87-
template <class T> \
88-
constexpr auto operator()(const T& arg) const \
89-
{ \
90-
using math::NAME; \
91-
return NAME(arg); \
92-
} \
93-
template <class B> \
94-
constexpr auto simd_apply(const B& arg) const \
95-
{ \
96-
using math::NAME; \
97-
return NAME(arg); \
98-
} \
84+
#define XTENSOR_UNARY_MATH_FUNCTOR(NAME) \
85+
struct NAME##_fun \
86+
{ \
87+
template <class T> \
88+
constexpr auto operator()(const T& arg) const \
89+
{ \
90+
using math::NAME; \
91+
return NAME(arg); \
92+
} \
93+
template <class B> \
94+
constexpr auto simd_apply(const B& arg) const \
95+
{ \
96+
using math::NAME; \
97+
return NAME(arg); \
98+
} \
99+
template <class B> \
100+
constexpr auto device_apply(const B& arg) const \
101+
{ \
102+
using math::NAME; \
103+
return NAME(arg); \
104+
} \
99105
}
100106

101107
#define XTENSOR_UNARY_MATH_FUNCTOR_COMPLEX_REDUCING(NAME) \
@@ -113,6 +119,12 @@ namespace xt
113119
using math::NAME; \
114120
return NAME(arg); \
115121
} \
122+
template <class B> \
123+
constexpr auto device_apply(const B& arg) const \
124+
{ \
125+
using math::NAME; \
126+
return NAME(arg); \
127+
} \
116128
}
117129

118130
#define XTENSOR_BINARY_MATH_FUNCTOR(NAME) \
@@ -130,6 +142,12 @@ namespace xt
130142
using math::NAME; \
131143
return NAME(arg1, arg2); \
132144
} \
145+
template <class B> \
146+
constexpr auto device_apply(const B& arg1, const B& arg2) const \
147+
{ \
148+
using math::NAME; \
149+
return NAME(arg1, arg2); \
150+
} \
133151
}
134152

135153
#define XTENSOR_TERNARY_MATH_FUNCTOR(NAME) \
@@ -147,6 +165,12 @@ namespace xt
147165
using math::NAME; \
148166
return NAME(arg1, arg2, arg3); \
149167
} \
168+
template <class B> \
169+
auto device_apply(const B& arg1, const B& arg2, const B& arg3) const \
170+
{ \
171+
using math::NAME; \
172+
return NAME(arg1, arg2, arg3); \
173+
} \
150174
}
151175

152176
namespace math

include/xtensor/xoperation.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ namespace xt
7979
{ \
8080
return (arg1 OP arg2); \
8181
} \
82+
template <class B> \
83+
constexpr auto device_apply(B&& arg1, const B&& arg2) const \
84+
{ \
85+
return (arg1 OP arg2); \
86+
} \
8287
}
8388

8489
namespace detail

test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ set(XTENSOR_TESTS
183183
test_xcomplex.cpp
184184
test_xcsv.cpp
185185
test_xdatesupport.cpp
186+
test_xdevice_assign.cpp
186187
test_xdynamic_view.cpp
187188
test_xfunctor_adaptor.cpp
188189
test_xfixed.cpp

test/test_xdevice_assign.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
3+
* Copyright (c) QuantStack *
4+
* *
5+
* Distributed under the terms of the BSD 3-Clause License. *
6+
* *
7+
* The full license is in the file LICENSE, distributed with this software. *
8+
****************************************************************************/
9+
// This file is generated from test/files/cppy_source/test_extended_broadcast_view.cppy by preprocess.py!
10+
// Warning: This file should not be modified directly! Instead, modify the `*.cppy` file.
11+
12+
13+
#include <algorithm>
14+
15+
#include "xtensor/xarray.hpp"
16+
#include "xtensor/xfixed.hpp"
17+
#include "xtensor/xnoalias.hpp"
18+
#include "xtensor/xstrided_view.hpp"
19+
#include "xtensor/xtensor.hpp"
20+
#include "xtensor/xview.hpp"
21+
22+
#include "test_common_macros.hpp"
23+
24+
namespace xt
25+
{
26+
TEST(test_xdevice, basic_xfunction)
27+
{
28+
std::vector<double> expectation = {2,3,4,5,6};
29+
30+
xt::xarray<float> a = {1., 2., 3., 4., 5.};
31+
xt::xarray<float> b = xt::ones_like(a);
32+
auto c = xt::xtensor<float, 1>::from_shape(a.shape());
33+
c = a + b;
34+
for(size_t i = 0; i < expectation.size(); i++)
35+
{
36+
ASSERT_EQ(c(i), expectation.at(i));
37+
}
38+
}
39+
}

0 commit comments

Comments
 (0)