55#include < AMReX_Arena.H>
66#include < AMReX_TypeTraits.H>
77#include < AMReX_GpuDevice.H>
8+ #include < AMReX_GpuContainers.H>
89#include < cstring>
910#include < cstdlib>
1011#include < initializer_list>
@@ -17,87 +18,150 @@ class Buffer
1718{
1819public:
1920
20- Buffer (std::initializer_list<T> init)
21- : m_size(init.size())
22- {
23- if (m_size == 0 ) { return ; }
24- #ifdef AMREX_USE_GPU
25- h_data = static_cast <T*>(The_Pinned_Arena ()->alloc (m_size*sizeof (T)));
26- #else
27- h_data = static_cast <T*>(std::malloc (m_size*sizeof (T)));
28- #endif
29- std::memcpy (h_data, init.begin (), m_size*sizeof (T));
30- #ifdef AMREX_USE_GPU
31- if (Gpu::inLaunchRegion ())
32- {
33- d_data = static_cast <T*>(The_Arena ()->alloc (m_size*sizeof (T)));
34- Gpu::htod_memcpy_async (d_data, h_data, m_size*sizeof (T));
21+ Buffer (std::initializer_list<T> init) {
22+ resize (init.size ());
23+
24+ if (init.size () > 0 ) {
25+ std::memcpy (h_vect.data (), init.begin (), init.size ()*sizeof (T));
26+ copyToDeviceAsync ();
3527 }
36- #endif
3728 }
3829
39- Buffer (T const * h_p, const std::size_t n)
40- : m_size(n)
41- {
42- if (m_size == 0 ) { return ; }
43- #ifdef AMREX_USE_GPU
44- h_data = static_cast <T*>(The_Pinned_Arena ()->alloc (m_size*sizeof (T)));
45- #else
46- h_data = static_cast <T*>(std::malloc (m_size*sizeof (T)));
47- #endif
48- std::memcpy (h_data, h_p, m_size*sizeof (T));
49- #ifdef AMREX_USE_GPU
50- if (Gpu::inLaunchRegion ())
51- {
52- d_data = static_cast <T*>(The_Arena ()->alloc (m_size*sizeof (T)));
53- Gpu::htod_memcpy_async (d_data, h_data, m_size*sizeof (T));
30+ Buffer (T const * h_p, const std::size_t n) {
31+ resize (n);
32+
33+ if (n > 0 && h_p != nullptr ) {
34+ std::memcpy (h_vect.data (), h_p, n*sizeof (T));
35+ copyToDeviceAsync ();
5436 }
55- #endif
5637 }
5738
58- ~Buffer () { clear (); }
39+ Buffer (const std::size_t n) {
40+ resize (n);
41+ }
5942
60- Buffer (Buffer const &) = delete ;
61- Buffer (Buffer &&) = delete ;
62- void operator = (Buffer const &) = delete ;
63- void operator = (Buffer &&) = delete ;
43+ Buffer () = default ;
6444
65- [[nodiscard]] T const * data () const noexcept { return (d_data != nullptr ) ? d_data : h_data; }
66- [[nodiscard]] T* data () noexcept { return (d_data != nullptr ) ? d_data : h_data; }
45+ [[nodiscard]] T const * data () const noexcept {
46+ return (useDVect () && !d_vect.empty ()) ? d_vect.data () : h_vect.data ();
47+ }
48+ [[nodiscard]] T* data () noexcept {
49+ return (useDVect () && !d_vect.empty ()) ? d_vect.data () : h_vect.data ();
50+ }
6751
68- [[nodiscard]] T const * hostData () const noexcept { return h_data; }
69- [[nodiscard]] T* hostData () noexcept { return h_data; }
52+ [[nodiscard]] T const * hostData () const noexcept { return h_vect.data (); }
53+ [[nodiscard]] T* hostData () noexcept { return h_vect.data (); }
54+
55+ /* *
56+ * \brief Changes the value of an element of the host (CPU) vector.
57+ * Does not update the device (GPU) vector, so copyToDeviceAsync()
58+ * needs to be called before accessing the data on the GPU.
59+ * \code{.cpp}
60+ * amrex::Gpu::Buffer<int> buf;
61+ * buf.resize(n);
62+ * for (int i=0; i<n; ++i) {
63+ * buf[i] = i*i;
64+ * }
65+ * buf.copyToDeviceAsync();
66+ * int * ptr = buf.data();
67+ * // Use ptr inside ParallelFor
68+ * // optional:
69+ * // Change values of ptr inside ParallelFor
70+ * buf.copyToHost();
71+ * // Use buf.hostData() or buf[] on the CPU
72+ * \endcode
73+ */
74+ [[nodiscard]] T& operator [] (const std::size_t i) noexcept {
75+ return h_vect[i];
76+ }
77+
78+ [[nodiscard]] const T& operator [] (const std::size_t i) const noexcept {
79+ return h_vect[i];
80+ }
7081
71- [[nodiscard]] std::size_t size () const noexcept { return m_size ; }
82+ [[nodiscard]] std::size_t size () const noexcept { return h_vect. size () ; }
7283
73- void clear ()
74- {
84+ [[nodiscard]] bool empty () const noexcept { return h_vect.size () == 0 ; }
85+
86+ void resize (const std::size_t n) noexcept {
87+ h_vect.resize (n);
88+ if (useDVect ()) {
89+ d_vect.resize (n);
90+ }
91+ }
92+
93+ void clear () noexcept {
94+ h_vect.clear ();
95+ d_vect.clear ();
96+ }
97+
98+ void shrink_to_fit () noexcept {
99+ h_vect.shrink_to_fit ();
100+ d_vect.shrink_to_fit ();
101+ }
102+
103+ void reserve (const std::size_t n) noexcept {
104+ h_vect.reserve (n);
105+ if (useDVect ()) {
106+ d_vect.reserve (n);
107+ }
108+ }
109+
110+ /* *
111+ * \brief Adds an element to the back of the host (CPU) vector.
112+ * Does not update the device (GPU) vector, so copyToDeviceAsync()
113+ * needs to be called before accessing the data on the GPU.
114+ * \code{.cpp}
115+ * amrex::Gpu::Buffer<int> buf;
116+ * buf.reserve(n);
117+ * for (int i=0; i<n; ++i) {
118+ * buf.push_back(i*i);
119+ * }
120+ * buf.copyToDeviceAsync();
121+ * int * ptr = buf.data();
122+ * // Use ptr inside ParallelFor
123+ * // optional:
124+ * // Change values of ptr inside ParallelFor
125+ * buf.copyToHost();
126+ * // Use buf.hostData() or buf[] on the CPU
127+ * \endcode
128+ */
129+ void push_back (const T& value) noexcept {
130+ h_vect.push_back (value);
131+ }
132+
133+ T* copyToDeviceAsync () noexcept {
75134#ifdef AMREX_USE_GPU
76- if (d_data) { The_Arena ()->free (d_data); }
77- if (h_data) { The_Pinned_Arena ()->free (h_data); }
78- #else
79- std::free (h_data);
135+ if (useDVect () && !h_vect.empty ())
136+ {
137+ d_vect.resize (h_vect.size ());
138+ Gpu::htod_memcpy_async (d_vect.data (), h_vect.data (), h_vect.size ()*sizeof (T));
139+ return d_vect.data ();
140+ }
80141#endif
81- d_data = nullptr ;
82- h_data = nullptr ;
142+ return h_vect.data ();
83143 }
84144
85- T* copyToHost ()
86- {
145+ T* copyToHost () noexcept {
87146#ifdef AMREX_USE_GPU
88- if (d_data )
147+ if (useDVect () && !d_vect. empty () )
89148 {
90- Gpu::dtoh_memcpy_async (h_data, d_data, m_size*sizeof (T));
149+ h_vect.resize (d_vect.size ());
150+ Gpu::dtoh_memcpy_async (h_vect.data (), d_vect.data (), d_vect.size ()*sizeof (T));
91151 Gpu::streamSynchronize ();
92152 }
93153#endif
94- return h_data ;
154+ return h_vect. data () ;
95155 }
96156
97157private:
98- std::size_t m_size;
99- T* d_data = nullptr ;
100- T* h_data = nullptr ;
158+
159+ [[nodiscard]] bool useDVect () const noexcept {
160+ return Gpu::inLaunchRegion () /* && !use_unified_gpu_memory */ ;
161+ }
162+
163+ DeviceVector<T> d_vect;
164+ PinnedVector<T> h_vect;
101165};
102166
103167}
0 commit comments