@@ -59,90 +59,107 @@ inline CUDA_HOST_DEVICE unsigned int GetLength(const char* code) {
5959
6060// / Tape type used for storing values in reverse-mode AD inside loops.
6161template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 ,
62- bool is_multithread = false >
63- using tape = tape_impl<T, SBO_SIZE, SLAB_SIZE, is_multithread>;
62+ bool is_multithread = false , bool DiskOffload = false >
63+ using tape = tape_impl<T, SBO_SIZE, SLAB_SIZE, is_multithread, DiskOffload >;
6464
6565// / Add value to the end of the tape, return the same value.
6666template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 ,
67- typename ... ArgsT>
68- CUDA_HOST_DEVICE T& push (tape<T, SBO_SIZE, SLAB_SIZE>& to, ArgsT... val) {
67+ bool DiskOffload = false , typename ... ArgsT>
68+ CUDA_HOST_DEVICE T&
69+ push (tape<T, SBO_SIZE, SLAB_SIZE, /* is_multithread=*/ false , DiskOffload>& to,
70+ ArgsT... val) {
6971 to.emplace_back (std::forward<ArgsT>(val)...);
7072 return to.back ();
7173}
7274
7375// / A specialization for C arrays
7476template <typename T, typename U, size_t N, std::size_t SBO_SIZE = 64 ,
75- std::size_t SLAB_SIZE = 1024 >
76- CUDA_HOST_DEVICE void push (tape<T[N], SBO_SIZE, SLAB_SIZE>& to, const U& val) {
77+ std::size_t SLAB_SIZE = 1024 , bool DiskOffload = false >
78+ CUDA_HOST_DEVICE void
79+ push (tape<T[N], SBO_SIZE, SLAB_SIZE, /* is_multithread=*/ false , DiskOffload>& to,
80+ const U& val) {
7781 to.emplace_back ();
7882 std::copy (std::begin (val), std::end (val), std::begin (to.back ()));
7983}
8084
8185 // / Remove the last value from the tape, return it.
82- template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 >
83- CUDA_HOST_DEVICE T pop (tape<T, SBO_SIZE, SLAB_SIZE>& to) {
84- T val = std::move (to.back ());
85- to.pop_back ();
86- return val;
87- }
86+ template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 ,
87+ bool DiskOffload = false >
88+ CUDA_HOST_DEVICE T
89+ pop (tape<T, SBO_SIZE, SLAB_SIZE, /* is_multithread=*/ false , DiskOffload>& to) {
90+ T val = std::move (to.back ());
91+ to.pop_back ();
92+ return val;
93+ }
8894
8995 // / A specialization for C arrays
90- template <typename T, std::size_t N, std::size_t SBO_SIZE = 64 ,
91- std::size_t SLAB_SIZE = 1024 >
92- CUDA_HOST_DEVICE void pop (tape<T[N], SBO_SIZE, SLAB_SIZE>& to) {
93- to.pop_back ();
94- }
96+ template <typename T, std::size_t N, std::size_t SBO_SIZE = 64 ,
97+ std::size_t SLAB_SIZE = 1024 , bool DiskOffload = false >
98+ CUDA_HOST_DEVICE void pop (tape<T[N], SBO_SIZE, SLAB_SIZE,
99+ /* is_multithread=*/ false , DiskOffload>& to) {
100+ to.pop_back ();
101+ }
95102
96103 // / Access return the last value in the tape.
97- template <typename T> CUDA_HOST_DEVICE T& back (tape<T>& of) {
98- return of.back ();
99- }
104+ template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 ,
105+ bool DiskOffload = false >
106+ CUDA_HOST_DEVICE T&
107+ back (tape<T, SBO_SIZE, SLAB_SIZE, /* is_multithread=*/ false , DiskOffload>& of) {
108+ return of.back ();
109+ }
100110
101111 // / Thread safe tape access functions with mutex locking mechanism
112+ // / Thread safe tape access functions with mutex locking mechanism
102113#ifndef __CUDACC__
103- // / Add value to the end of the tape, return the same value.
104- template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 ,
105- typename ... ArgsT>
106- T push (tape<T, SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true >& to,
107- ArgsT&&... val) {
108- std::lock_guard<std::mutex> lock (to.mutex ());
109- to.emplace_back (std::forward<ArgsT>(val)...);
110- return to.back ();
111- }
114+ // / Add value to the end of the tape, return the same value.
115+ template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 ,
116+ bool DiskOffload = false , typename ... ArgsT>
117+ T push (tape<T, SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true , DiskOffload >& to,
118+ ArgsT&&... val) {
119+ std::lock_guard<std::mutex> lock (to.mutex ());
120+ to.emplace_back (std::forward<ArgsT>(val)...);
121+ return to.back ();
122+ }
112123
113124 // / A specialization for C arrays
114- template <typename T, typename U, size_t N, std::size_t SBO_SIZE = 64 ,
115- std::size_t SLAB_SIZE = 1024 >
116- void push (tape<T[N], SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true >& to,
117- const U& val) {
118- std::lock_guard<std::mutex> lock (to.mutex ());
119- to.emplace_back ();
120- std::copy (std::begin (val), std::end (val), std::begin (to.back ()));
121- }
125+ template <typename T, typename U, size_t N, std::size_t SBO_SIZE = 64 ,
126+ std::size_t SLAB_SIZE = 1024 , bool DiskOffload = false >
127+ void push (
128+ tape<T[N], SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true , DiskOffload>& to,
129+ const U& val) {
130+ std::lock_guard<std::mutex> lock (to.mutex ());
131+ to.emplace_back ();
132+ std::copy (std::begin (val), std::end (val), std::begin (to.back ()));
133+ }
122134
123135 // / Remove the last value from the tape, return it.
124- template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 >
125- T pop (tape<T, SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true >& to) {
126- std::lock_guard<std::mutex> lock (to.mutex ());
127- T val = std::move (to.back ());
128- to.pop_back ();
129- return val;
130- }
136+ template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 ,
137+ bool DiskOffload = false >
138+ T pop (
139+ tape<T, SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true , DiskOffload>& to) {
140+ std::lock_guard<std::mutex> lock (to.mutex ());
141+ T val = std::move (to.back ());
142+ to.pop_back ();
143+ return val;
144+ }
131145
132146 // / A specialization for C arrays
133- template <typename T, std::size_t N, std::size_t SBO_SIZE = 64 ,
134- std::size_t SLAB_SIZE = 1024 >
135- void pop (tape<T[N], SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true >& to) {
136- std::lock_guard<std::mutex> lock (to.mutex ());
137- to.pop_back ();
138- }
147+ template <typename T, std::size_t N, std::size_t SBO_SIZE = 64 ,
148+ std::size_t SLAB_SIZE = 1024 , bool DiskOffload = false >
149+ void pop (tape<T[N], SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true ,
150+ DiskOffload>& to) {
151+ std::lock_guard<std::mutex> lock (to.mutex ());
152+ to.pop_back ();
153+ }
139154
140155 // / Access return the last value in the tape.
141- template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 >
142- T& back (tape<T, SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true >& of) {
143- std::lock_guard<std::mutex> lock (of.mutex ());
144- return of.back ();
145- }
156+ template <typename T, std::size_t SBO_SIZE = 64 , std::size_t SLAB_SIZE = 1024 ,
157+ bool DiskOffload = false >
158+ T& back (
159+ tape<T, SBO_SIZE, SLAB_SIZE, /* is_multithreaded=*/ true , DiskOffload>& of) {
160+ std::lock_guard<std::mutex> lock (of.mutex ());
161+ return of.back ();
162+ }
146163#endif
147164
148165 // / The purpose of this function is to initialize adjoints
0 commit comments