2
2
#include " pbat/gpu/DisableWarnings.h"
3
3
// clang-format on
4
4
5
- #include " pbat/HostDevice.h"
6
5
#include " VbdImplKernels.cuh"
6
+ #include " pbat/HostDevice.h"
7
+ #include " pbat/physics/StableNeoHookeanEnergy.h"
7
8
8
9
namespace pbat {
9
10
namespace gpu {
@@ -17,6 +18,8 @@ __global__ void MinimizeBackwardEuler(BackwardEulerMinimization BDF)
17
18
auto tid = threadIdx .x ;
18
19
auto bid = blockIdx .x ;
19
20
auto nThreadsPerBlock = blockDim .x ;
21
+ using namespace mini ;
22
+
20
23
// Vertex index
21
24
GpuIndex i = BDF.partition [bid];
22
25
// Get vertex-tet adjacency information
@@ -39,12 +42,14 @@ __global__ void MinimizeBackwardEuler(BackwardEulerMinimization BDF)
39
42
return ;
40
43
41
44
// 2. Accumulate results into vertex hessian and gradient
42
- using namespace pbat ::math::linalg::mini;
43
- SMatrix<GpuScalar, 3 > xti = BDF.ToLocal (i, BDF.xt );
44
- SMatrix<GpuScalar, 3 > xitilde = BDF.ToLocal (i, BDF.xtilde );
45
- SMatrix<GpuScalar, 3 > xi = BDF.ToLocal (i, BDF.x );
46
- SMatrix<GpuScalar, 3 , 3 > Hi = Zeros<GpuScalar, 3 , 3 >{};
47
- SMatrix<GpuScalar, 3 , 1 > gi = Zeros<GpuScalar, 3 , 1 >{};
45
+ SVector<GpuScalar, 3 > xti = FromBuffers<3 , 1 >(BDF.xt , i);
46
+ SVector<GpuScalar, 3 > xitilde = FromBuffers<3 , 1 >(BDF.xtilde , i);
47
+ SVector<GpuScalar, 3 > xi{
48
+ BDF.x [0 ][i],
49
+ BDF.x [1 ][i],
50
+ BDF.x [2 ][i]} /* = FromBuffers<3, 1>(BDF.x, i)*/ ;
51
+ SMatrix<GpuScalar, 3 , 3 > Hi = Zeros<GpuScalar, 3 , 3 >{};
52
+ SVector<GpuScalar, 3 > gi = Zeros<GpuScalar, 3 , 1 >{};
48
53
// Add elastic energy derivatives
49
54
auto const nActiveThreads = min (nAdjacentElements, nThreadsPerBlock);
50
55
for (auto j = 0 ; j < nActiveThreads; ++j)
@@ -62,216 +67,40 @@ __global__ void MinimizeBackwardEuler(BackwardEulerMinimization BDF)
62
67
Hi *= GpuScalar{1 } + D;
63
68
// Add inertial energy derivatives
64
69
GpuScalar const K = BDF.m [i] / BDF.dt2 ;
65
- Hi (0 , 0 ) += K;
66
- Hi (1 , 1 ) += K;
67
- Hi (2 , 2 ) += K;
70
+ Diag (Hi) += K;
68
71
gi += K * (xi - xitilde);
69
72
70
73
// 3. Newton step
71
74
if (abs (Determinant (Hi)) <= BDF.detHZero ) // Skip nearly rank-deficient hessian
72
75
return ;
76
+
73
77
xi = xi - (Inverse (Hi) * gi);
74
78
75
79
// 4. Commit vertex descent step
76
- BDF. ToGlobal (i, xi, BDF.x );
80
+ ToBuffers ( xi, BDF.x , i );
77
81
};
78
82
79
- PBAT_DEVICE pbat::math::linalg::mini::SMatrix<GpuScalar, 4 , 3 >
80
- BackwardEulerMinimization::BasisFunctionGradients (GpuIndex e) const
81
- {
82
- using namespace pbat ::math::linalg::mini;
83
- SMatrix<GpuScalar, 4 , 3 > GPlocal = SMatrixView<GpuScalar, 4 , 3 >(GP + e * 12 );
84
- return GPlocal;
85
- }
86
-
87
- PBAT_DEVICE pbat::math::linalg::mini::SMatrix<GpuScalar, 3 , 4 >
88
- BackwardEulerMinimization::ElementVertexPositions (GpuIndex e) const
89
- {
90
- using namespace pbat ::math::linalg::mini;
91
- SMatrix<GpuScalar, 3 , 4 > xe;
92
- for (auto i = 0 ; i < 4 ; ++i)
93
- {
94
- GpuIndex vi = T[i][e];
95
- xe.Col (i) = ToLocal (vi, x);
96
- }
97
- return xe;
98
- }
99
-
100
- PBAT_DEVICE pbat::math::linalg::mini::SMatrix<GpuScalar, 9 , 10 >
101
- BackwardEulerMinimization::StableNeoHookeanDerivativesWrtF (
102
- GpuIndex e,
103
- pbat::math::linalg::mini::SMatrix<GpuScalar, 3 , 3 > const & Fe,
104
- GpuScalar mu,
105
- GpuScalar lambda) const
106
- {
107
- using namespace pbat ::math::linalg::mini;
108
- SMatrix<GpuScalar, 9 , 10 > HGe;
109
- auto He = HGe.Slice <9 , 9 >(0 , 0 );
110
- auto ge = HGe.Col (9 );
111
-
112
- // Auto-generated from pbat/physics/StableNeoHookeanEnergy.h
113
- GpuScalar const a0 = Fe (4 ) * Fe (8 );
114
- GpuScalar const a1 = Fe (5 ) * Fe (7 );
115
- GpuScalar const a2 = 2 * a0 - 2 * a1;
116
- GpuScalar const a3 = Fe (3 ) * Fe (8 );
117
- GpuScalar const a4 = Fe (4 ) * Fe (6 );
118
- GpuScalar const a5 = lambda * (-a1 * Fe (0 ) - a3 * Fe (1 ) - a4 * Fe (2 ) + Fe (0 ) * Fe (4 ) * Fe (8 ) +
119
- Fe (1 ) * Fe (5 ) * Fe (6 ) + Fe (2 ) * Fe (3 ) * Fe (7 ) - 1 - mu / lambda);
120
- GpuScalar const a6 = (1.0 / 2.0 ) * a5;
121
- GpuScalar const a7 = -2 * a3 + 2 * Fe (5 ) * Fe (6 );
122
- GpuScalar const a8 = Fe (3 ) * Fe (7 );
123
- GpuScalar const a9 = -2 * a4 + 2 * a8;
124
- GpuScalar const a10 = Fe (1 ) * Fe (8 );
125
- GpuScalar const a11 = -2 * a10 + 2 * Fe (2 ) * Fe (7 );
126
- GpuScalar const a12 = Fe (0 ) * Fe (8 );
127
- GpuScalar const a13 = Fe (2 ) * Fe (6 );
128
- GpuScalar const a14 = 2 * a12 - 2 * a13;
129
- GpuScalar const a15 = Fe (0 ) * Fe (7 );
130
- GpuScalar const a16 = -2 * a15 + 2 * Fe (1 ) * Fe (6 );
131
- GpuScalar const a17 = Fe (1 ) * Fe (5 );
132
- GpuScalar const a18 = Fe (2 ) * Fe (4 );
133
- GpuScalar const a19 = 2 * a17 - 2 * a18;
134
- GpuScalar const a20 = Fe (0 ) * Fe (5 );
135
- GpuScalar const a21 = -2 * a20 + 2 * Fe (2 ) * Fe (3 );
136
- GpuScalar const a22 = Fe (0 ) * Fe (4 );
137
- GpuScalar const a23 = Fe (1 ) * Fe (3 );
138
- GpuScalar const a24 = 2 * a22 - 2 * a23;
139
- GpuScalar const a25 = (1.0 / 2.0 ) * lambda;
140
- GpuScalar const a26 = a25 * (a0 - a1);
141
- GpuScalar const a27 = a5 * Fe (8 );
142
- GpuScalar const a28 = a5 * Fe (7 );
143
- GpuScalar const a29 = -a28;
144
- GpuScalar const a30 = a5 * Fe (5 );
145
- GpuScalar const a31 = -a30;
146
- GpuScalar const a32 = a5 * Fe (4 );
147
- GpuScalar const a33 = a25 * (-a3 + Fe (5 ) * Fe (6 ));
148
- GpuScalar const a34 = -a27;
149
- GpuScalar const a35 = a5 * Fe (6 );
150
- GpuScalar const a36 = a5 * Fe (3 );
151
- GpuScalar const a37 = -a36;
152
- GpuScalar const a38 = a25 * (-a4 + a8);
153
- GpuScalar const a39 = -a35;
154
- GpuScalar const a40 = -a32;
155
- GpuScalar const a41 = a25 * (-a10 + Fe (2 ) * Fe (7 ));
156
- GpuScalar const a42 = a5 * Fe (2 );
157
- GpuScalar const a43 = a5 * Fe (1 );
158
- GpuScalar const a44 = -a43;
159
- GpuScalar const a45 = a25 * (a12 - a13);
160
- GpuScalar const a46 = -a42;
161
- GpuScalar const a47 = a5 * Fe (0 );
162
- GpuScalar const a48 = a25 * (-a15 + Fe (1 ) * Fe (6 ));
163
- GpuScalar const a49 = -a47;
164
- GpuScalar const a50 = a25 * (a17 - a18);
165
- GpuScalar const a51 = a25 * (-a20 + Fe (2 ) * Fe (3 ));
166
- GpuScalar const a52 = a25 * (a22 - a23);
167
- ge (0 ) = a2 * a6 + mu * Fe (0 );
168
- ge (1 ) = a6 * a7 + mu * Fe (1 );
169
- ge (2 ) = a6 * a9 + mu * Fe (2 );
170
- ge (3 ) = a11 * a6 + mu * Fe (3 );
171
- ge (4 ) = a14 * a6 + mu * Fe (4 );
172
- ge (5 ) = a16 * a6 + mu * Fe (5 );
173
- ge (6 ) = a19 * a6 + mu * Fe (6 );
174
- ge (7 ) = a21 * a6 + mu * Fe (7 );
175
- ge (8 ) = a24 * a6 + mu * Fe (8 );
176
- He (0 ) = a2 * a26 + mu;
177
- He (1 ) = a26 * a7;
178
- He (2 ) = a26 * a9;
179
- He (3 ) = a11 * a26;
180
- He (4 ) = a14 * a26 + a27;
181
- He (5 ) = a16 * a26 + a29;
182
- He (6 ) = a19 * a26;
183
- He (7 ) = a21 * a26 + a31;
184
- He (8 ) = a24 * a26 + a32;
185
- He (9 ) = a2 * a33;
186
- He (10 ) = a33 * a7 + mu;
187
- He (11 ) = a33 * a9;
188
- He (12 ) = a11 * a33 + a34;
189
- He (13 ) = a14 * a33;
190
- He (14 ) = a16 * a33 + a35;
191
- He (15 ) = a19 * a33 + a30;
192
- He (16 ) = a21 * a33;
193
- He (17 ) = a24 * a33 + a37;
194
- He (18 ) = a2 * a38;
195
- He (19 ) = a38 * a7;
196
- He (20 ) = a38 * a9 + mu;
197
- He (21 ) = a11 * a38 + a28;
198
- He (22 ) = a14 * a38 + a39;
199
- He (23 ) = a16 * a38;
200
- He (24 ) = a19 * a38 + a40;
201
- He (25 ) = a21 * a38 + a36;
202
- He (26 ) = a24 * a38;
203
- He (27 ) = a2 * a41;
204
- He (28 ) = a34 + a41 * a7;
205
- He (29 ) = a28 + a41 * a9;
206
- He (30 ) = a11 * a41 + mu;
207
- He (31 ) = a14 * a41;
208
- He (32 ) = a16 * a41;
209
- He (33 ) = a19 * a41;
210
- He (34 ) = a21 * a41 + a42;
211
- He (35 ) = a24 * a41 + a44;
212
- He (36 ) = a2 * a45 + a27;
213
- He (37 ) = a45 * a7;
214
- He (38 ) = a39 + a45 * a9;
215
- He (39 ) = a11 * a45;
216
- He (40 ) = a14 * a45 + mu;
217
- He (41 ) = a16 * a45;
218
- He (42 ) = a19 * a45 + a46;
219
- He (43 ) = a21 * a45;
220
- He (44 ) = a24 * a45 + a47;
221
- He (45 ) = a2 * a48 + a29;
222
- He (46 ) = a35 + a48 * a7;
223
- He (47 ) = a48 * a9;
224
- He (48 ) = a11 * a48;
225
- He (49 ) = a14 * a48;
226
- He (50 ) = a16 * a48 + mu;
227
- He (51 ) = a19 * a48 + a43;
228
- He (52 ) = a21 * a48 + a49;
229
- He (53 ) = a24 * a48;
230
- He (54 ) = a2 * a50;
231
- He (55 ) = a30 + a50 * a7;
232
- He (56 ) = a40 + a50 * a9;
233
- He (57 ) = a11 * a50;
234
- He (58 ) = a14 * a50 + a46;
235
- He (59 ) = a16 * a50 + a43;
236
- He (60 ) = a19 * a50 + mu;
237
- He (61 ) = a21 * a50;
238
- He (62 ) = a24 * a50;
239
- He (63 ) = a2 * a51 + a31;
240
- He (64 ) = a51 * a7;
241
- He (65 ) = a36 + a51 * a9;
242
- He (66 ) = a11 * a51 + a42;
243
- He (67 ) = a14 * a51;
244
- He (68 ) = a16 * a51 + a49;
245
- He (69 ) = a19 * a51;
246
- He (70 ) = a21 * a51 + mu;
247
- He (71 ) = a24 * a51;
248
- He (72 ) = a2 * a52 + a32;
249
- He (73 ) = a37 + a52 * a7;
250
- He (74 ) = a52 * a9;
251
- He (75 ) = a11 * a52 + a44;
252
- He (76 ) = a14 * a52 + a47;
253
- He (77 ) = a16 * a52;
254
- He (78 ) = a19 * a52;
255
- He (79 ) = a21 * a52;
256
- He (80 ) = a24 * a52 + mu;
257
- return HGe;
258
- }
259
-
260
83
PBAT_DEVICE void BackwardEulerMinimization::ComputeStableNeoHookeanDerivatives (
261
84
GpuIndex e,
262
85
GpuIndex ilocal,
263
86
GpuScalar* Hge) const
264
87
{
265
- using namespace pbat ::math::linalg:: mini;
88
+ using namespace mini ;
266
89
GpuScalar wge = wg[e];
267
- SMatrix<GpuScalar, 2 , 1 > lamee = SMatrixView<GpuScalar, 2 , 1 >{ lame + 2 * e} ;
90
+ SMatrix<GpuScalar, 2 , 1 > lamee = FromFlatBuffer< 2 , 1 >( lame, e) ;
268
91
// Compute (d^k Psi / dF^k)
269
- SMatrix<GpuScalar, 3 , 4 > xe = ElementVertexPositions (e);
270
- SMatrix<GpuScalar, 4 , 3 > GPe = BasisFunctionGradients (e);
271
- SMatrix<GpuScalar, 3 , 3 > Fe = xe * GPe;
272
- SMatrix<GpuScalar, 9 , 10 > HGF = StableNeoHookeanDerivativesWrtF (e, Fe, lamee (0 ), lamee (1 ));
273
- auto HF = HGF.Slice <9 , 9 >(0 , 0 );
274
- auto gF = HGF.Col (9 );
92
+ SMatrix<GpuIndex, 4 , 1 > v = FromBuffers<4 , 1 >(T, e);
93
+ SMatrix<GpuScalar, 3 , 4 > xe = FromBuffers (x, v.Transpose ());
94
+ SMatrix<GpuScalar, 4 , 3 > GPe = FromFlatBuffer<4 , 3 >(GP, e);
95
+ SMatrix<GpuScalar, 3 , 3 > Fe = xe * GPe;
96
+ SVector<GpuScalar, 9 > gF ;
97
+ SMatrix<GpuScalar, 9 , 9 > HF;
98
+ physics::StableNeoHookeanEnergy<3 > Psi{};
99
+ // NOTE:
100
+ // For some reason, nvcc doesn't like structured bindings, so I can't write
101
+ // auto [gF, HF] = Psi.gradAndHessian(Fe, lamee(0), lamee(1));
102
+ // or else I get nan values?? Weird.
103
+ std::tie (gF , HF) = Psi.gradAndHessian (Fe, lamee (0 ), lamee (1 ));
275
104
// Write vertex-specific derivatives into output memory HGe
276
105
SMatrixView<GpuScalar, 3 , 4 > HGei (Hge);
277
106
auto Hi = HGei.Slice <3 , 3 >(0 , 0 );
0 commit comments