3
3
// clang-format on
4
4
5
5
#include " VbdImpl.cuh"
6
- #include " VbdImplKernels .cuh"
6
+ #include " Kernels .cuh"
7
7
#include " pbat/gpu/common/Cuda.cuh"
8
+ #include " pbat/math/linalg/mini/Mini.h"
9
+ #include " pbat/sim/vbd/Kernels.h"
8
10
9
11
#include < cuda/api.hpp>
10
12
// #include <thrust/async/copy.h>
@@ -25,7 +27,7 @@ VbdImpl::VbdImpl(
25
27
F(Fin),
26
28
T(Tin),
27
29
mPositionsAtT(Xin.cols()),
28
- mKineticEnergyMinimalPositions (Xin.cols()),
30
+ mInertialTargetPositions (Xin.cols()),
29
31
mChebyshevPositionsM2(Xin.cols()),
30
32
mChebyshevPositionsM1(Xin.cols()),
31
33
mVelocitiesAtT(Xin.cols()),
@@ -74,7 +76,7 @@ void VbdImpl::Step(GpuScalar dt, GpuIndex iterations, GpuIndex substeps, GpuScal
74
76
bdf.dt = sdt;
75
77
bdf.dt2 = sdt2;
76
78
bdf.m = mMass .Raw ();
77
- bdf.xtilde = mKineticEnergyMinimalPositions .Raw ();
79
+ bdf.xtilde = mInertialTargetPositions .Raw ();
78
80
bdf.xt = mPositionsAtT .Raw ();
79
81
bdf.x = X.x .Raw ();
80
82
bdf.T = T.inds .Raw ();
@@ -98,6 +100,7 @@ void VbdImpl::Step(GpuScalar dt, GpuIndex iterations, GpuIndex substeps, GpuScal
98
100
mStream .device ().make_current ();
99
101
for (auto s = 0 ; s < substeps; ++s)
100
102
{
103
+ using namespace pbat ::math::linalg::mini;
101
104
// Store previous positions
102
105
for (auto d = 0 ; d < X.x .Dimensions (); ++d)
103
106
{
@@ -113,40 +116,56 @@ void VbdImpl::Step(GpuScalar dt, GpuIndex iterations, GpuIndex substeps, GpuScal
113
116
thrust::device.on (mStream .handle ()),
114
117
thrust::make_counting_iterator<GpuIndex>(0 ),
115
118
thrust::make_counting_iterator<GpuIndex>(nVertices),
116
- kernels::FKineticEnergyMinimum{
117
- sdt,
118
- sdt2,
119
- X.x .Raw (),
120
- mVelocities .Raw (),
121
- mExternalAcceleration .Raw (),
122
- mKineticEnergyMinimalPositions .Raw ()});
119
+ [xt = mPositionsAtT .Raw (),
120
+ vt = mVelocities .Raw (),
121
+ aext = mExternalAcceleration .Raw (),
122
+ xtilde = mInertialTargetPositions .Raw (),
123
+ dt = sdt,
124
+ dt2 = sdt2] PBAT_DEVICE (auto i) {
125
+ using pbat::sim::vbd::kernels::InertialTarget;
126
+ auto y = InertialTarget (
127
+ FromBuffers<3 , 1 >(xt, i),
128
+ FromBuffers<3 , 1 >(vt, i),
129
+ FromBuffers<3 , 1 >(aext, i),
130
+ dt,
131
+ dt2);
132
+ ToBuffers (y, xtilde, i);
133
+ });
123
134
// Initialize block coordinate descent's, i.e. BCD's, solution
124
135
e = thrust::async::for_each (
125
136
thrust::device.on (mStream .handle ()),
126
137
thrust::make_counting_iterator<GpuIndex>(0 ),
127
138
thrust::make_counting_iterator<GpuIndex>(nVertices),
128
- kernels::FAdaptiveInitialization{
129
- sdt,
130
- sdt2,
131
- mPositionsAtT .Raw (),
132
- mVelocitiesAtT .Raw (),
133
- mVelocities .Raw (),
134
- mExternalAcceleration .Raw (),
135
- X.x .Raw (),
136
- mInitializationStrategy });
139
+ [xt = mPositionsAtT .Raw (),
140
+ vtm1 = mVelocitiesAtT .Raw (),
141
+ vt = mVelocities .Raw (),
142
+ aext = mExternalAcceleration .Raw (),
143
+ x = X.x .Raw (),
144
+ dt = sdt,
145
+ dt2 = sdt2,
146
+ strategy = mInitializationStrategy ] PBAT_DEVICE (auto i) {
147
+ using pbat::sim::vbd::kernels::InitialPositionsForSolve;
148
+ auto x0 = InitialPositionsForSolve (
149
+ FromBuffers<3 , 1 >(xt, i),
150
+ FromBuffers<3 , 1 >(vtm1, i),
151
+ FromBuffers<3 , 1 >(vt, i),
152
+ FromBuffers<3 , 1 >(aext, i),
153
+ dt,
154
+ dt2,
155
+ strategy);
156
+ ToBuffers (x0, x, i);
157
+ });
137
158
// Initialize Chebyshev semi-iterative method
138
- kernels::FChebyshev fChebyshev {
139
- rho,
140
- mChebyshevPositionsM2 .Raw (),
141
- mChebyshevPositionsM1 .Raw (),
142
- X.x .Raw ()};
159
+ GpuScalar rho2 = rho * rho;
160
+ GpuScalar omega{};
143
161
auto kDynamicSharedMemoryCapacity = static_cast <cuda::memory::shared::size_t >(
144
162
mGpuThreadBlockSize * bdf.ExpectedSharedMemoryPerThreadInBytes ());
145
163
// Minimize Backward Euler, i.e. BDF1, objective
146
164
for (auto k = 0 ; k < iterations; ++k)
147
165
{
166
+ using pbat::sim::vbd::kernels::ChebyshevOmega;
148
167
if (bUseChebyshevAcceleration)
149
- fChebyshev . SetIteration (k );
168
+ omega = ChebyshevOmega (k, rho2, omega );
150
169
151
170
for (auto & partition : mPartitions )
152
171
{
@@ -171,7 +190,17 @@ void VbdImpl::Step(GpuScalar dt, GpuIndex iterations, GpuIndex substeps, GpuScal
171
190
thrust::device.on (mStream .handle ()),
172
191
thrust::make_counting_iterator<GpuIndex>(0 ),
173
192
thrust::make_counting_iterator<GpuIndex>(nVertices),
174
- fChebyshev );
193
+ [k = k,
194
+ omega = omega,
195
+ xkm2 = mChebyshevPositionsM2 .Raw (),
196
+ xkm1 = mChebyshevPositionsM1 .Raw (),
197
+ xk = X.x .Raw ()] PBAT_DEVICE (auto i) {
198
+ using pbat::sim::vbd::kernels::ChebyshevUpdate;
199
+ auto xkm2i = FromBuffers<3 , 1 >(xkm2, i);
200
+ auto xkm1i = FromBuffers<3 , 1 >(xkm1, i);
201
+ auto xki = FromBuffers<3 , 1 >(xk, i);
202
+ ChebyshevUpdate (k, omega, xkm2i, xkm1i, xki);
203
+ });
175
204
}
176
205
}
177
206
// Update velocities
@@ -187,7 +216,13 @@ void VbdImpl::Step(GpuScalar dt, GpuIndex iterations, GpuIndex substeps, GpuScal
187
216
thrust::device.on (mStream .handle ()),
188
217
thrust::make_counting_iterator<GpuIndex>(0 ),
189
218
thrust::make_counting_iterator<GpuIndex>(nVertices),
190
- kernels::FUpdateVelocity{sdt, mPositionsAtT .Raw (), X.x .Raw (), mVelocities .Raw ()});
219
+ [xt = mPositionsAtT .Raw (), x = X.x .Raw (), v = mVelocities .Raw (), dt = dt] PBAT_DEVICE (
220
+ auto i) {
221
+ using pbat::sim::vbd::kernels::IntegrateVelocity;
222
+ auto vtp1 =
223
+ IntegrateVelocity (FromBuffers<3 , 1 >(xt, i), FromBuffers<3 , 1 >(x, i), dt);
224
+ ToBuffers (vtp1, v, i);
225
+ });
191
226
}
192
227
mStream .synchronize ();
193
228
}
@@ -395,7 +430,7 @@ std::vector<common::Buffer<GpuIndex>> const& VbdImpl::GetPartitions() const
395
430
#include < span>
396
431
#include < vector>
397
432
398
- TEST_CASE (" [gpu][xpbd] Xpbd " )
433
+ TEST_CASE (" [gpu][vbd] Vbd " )
399
434
{
400
435
using pbat::GpuIndex;
401
436
using pbat::GpuIndexMatrixX;
0 commit comments