Skip to content

Commit 225ded4

Browse files
committed
Use mini to load/store buffer memory in XPBD
1 parent 546fa4b commit 225ded4

File tree

3 files changed

+106
-107
lines changed

3 files changed

+106
-107
lines changed

source/pbat/gpu/xpbd/XpbdImplKernels.cuh

Lines changed: 57 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,25 @@ namespace gpu {
1414
namespace xpbd {
1515
namespace XpbdImplKernels {
1616

17+
namespace mini = pbat::math::linalg::mini;
18+
1719
struct FInitializeNeoHookeanConstraint
1820
{
1921
PBAT_DEVICE void operator()(GpuIndex c)
2022
{
21-
using namespace pbat::math::linalg::mini;
23+
using namespace mini;
2224
// Load vertex positions of element c
23-
GpuIndex const v[4] = {T[0][c], T[1][c], T[2][c], T[3][c]};
24-
SMatrix<GpuScalar, 3, 4> xc{};
25-
for (auto d = 0; d < 3; ++d)
26-
for (auto j = 0; j < 4; ++j)
27-
xc(d, j) = x[d][v[j]];
25+
SMatrix<GpuIndex, 4, 1> v = FromBuffers<4, 1>(T, c);
26+
SMatrix<GpuScalar, 3, 4> xc = FromBuffers(x, v.Transpose());
2827
// Compute shape matrix and its inverse
29-
SMatrix<GpuScalar, 3, 3> Ds = (xc.Slice<3, 3>(0, 1) - Repeat<1, 3>(xc.Col(0)));
30-
SMatrixView<GpuScalar, 3, 3> DmInvC(DmInv + 9 * c);
31-
DmInvC = Inverse(Ds);
28+
SMatrix<GpuScalar, 3, 3> Ds = (xc.Slice<3, 3>(0, 1) - Repeat<1, 3>(xc.Col(0)));
29+
SMatrixView<GpuScalar, 3, 3> DmInvC = FromFlatBuffer<3, 3>(DmInv, c);
30+
DmInvC = Inverse(Ds);
3231
// Compute constraint compliance
33-
GpuScalar const tetVolume = Determinant(Ds) / GpuScalar{6.};
34-
SMatrixView<GpuScalar, 2, 1> alphac{alpha + 2 * c};
35-
SMatrixView<GpuScalar, 2, 1> lamec{lame + 2 * c};
36-
alphac(0) = GpuScalar{1.} / (lamec(0) * tetVolume);
37-
alphac(1) = GpuScalar{1.} / (lamec(1) * tetVolume);
32+
GpuScalar const tetVolume = Determinant(Ds) / GpuScalar{6};
33+
SMatrixView<GpuScalar, 2, 1> alphac = FromFlatBuffer<2, 1>(alpha, c);
34+
SMatrixView<GpuScalar, 2, 1> lamec = FromFlatBuffer<2, 1>(lame, c);
35+
alphac = GpuScalar{1} / (lamec * tetVolume);
3836
// Compute rest stability
3937
gamma[c] = GpuScalar{1.} + lamec(0) / lamec(1);
4038
}
@@ -69,13 +67,13 @@ struct FStableNeoHookeanConstraint
6967
{
7068
PBAT_DEVICE void Project(
7169
GpuScalar C,
72-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 4> const& gradC,
73-
pbat::math::linalg::mini::SMatrix<GpuScalar, 4, 1> const& minvc,
70+
mini::SMatrix<GpuScalar, 3, 4> const& gradC,
71+
mini::SMatrix<GpuScalar, 4, 1> const& minvc,
7472
GpuScalar atilde,
7573
GpuScalar& lambdac,
76-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 4>& xc)
74+
mini::SMatrix<GpuScalar, 3, 4>& xc)
7775
{
78-
using namespace pbat::math::linalg::mini;
76+
using namespace mini;
7977
GpuScalar dlambda =
8078
-(C + atilde * lambdac) /
8179
(minvc(0) * SquaredNorm(gradC.Col(0)) + minvc(1) * SquaredNorm(gradC.Col(1)) +
@@ -89,13 +87,13 @@ struct FStableNeoHookeanConstraint
8987

9088
PBAT_DEVICE void ProjectHydrostatic(
9189
GpuIndex c,
92-
pbat::math::linalg::mini::SMatrix<GpuScalar, 4, 1> const& minvc,
90+
mini::SMatrix<GpuScalar, 4, 1> const& minvc,
9391
GpuScalar atilde,
9492
GpuScalar gammac,
9593
GpuScalar& lambdac,
96-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 4>& xc)
94+
mini::SMatrix<GpuScalar, 3, 4>& xc)
9795
{
98-
using namespace pbat::math::linalg::mini;
96+
using namespace mini;
9997
SMatrixView<GpuScalar, 3, 3> DmInvC(DmInv + 9 * c);
10098
SMatrix<GpuScalar, 3, 3> F = (xc.Slice<3, 3>(0, 1) - Repeat<1, 3>(xc.Col(0))) * DmInvC;
10199
GpuScalar C = Determinant(F) - gammac;
@@ -111,12 +109,12 @@ struct FStableNeoHookeanConstraint
111109

112110
PBAT_DEVICE void ProjectDeviatoric(
113111
GpuIndex c,
114-
pbat::math::linalg::mini::SMatrix<GpuScalar, 4, 1> const& minvc,
112+
mini::SMatrix<GpuScalar, 4, 1> const& minvc,
115113
GpuScalar atilde,
116114
GpuScalar& lambdac,
117-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 4>& xc)
115+
mini::SMatrix<GpuScalar, 3, 4>& xc)
118116
{
119-
using namespace pbat::math::linalg::mini;
117+
using namespace mini;
120118
SMatrixView<GpuScalar, 3, 3> DmInvC(DmInv + 9 * c);
121119
SMatrix<GpuScalar, 3, 3> F = (xc.Slice<3, 3>(0, 1) - Repeat<1, 3>(xc.Col(0))) * DmInvC;
122120
GpuScalar C = Norm(F);
@@ -128,34 +126,23 @@ struct FStableNeoHookeanConstraint
128126

129127
PBAT_DEVICE void operator()(GpuIndex c)
130128
{
131-
using namespace pbat::math::linalg::mini;
129+
using namespace mini;
132130

133131
// 1. Load constraint data in local memory
134-
GpuIndex const v[4] = {T[0][c], T[1][c], T[2][c], T[3][c]};
135-
SMatrix<GpuScalar, 3, 4> xc{};
136-
for (auto d = 0; d < 3; ++d)
137-
for (auto j = 0; j < 4; ++j)
138-
xc(d, j) = x[d][v[j]];
139-
SMatrix<GpuScalar, 4, 1> minvc{};
140-
for (auto j = 0; j < 4; ++j)
141-
minvc(j) = minv[v[j]];
142-
SMatrix<GpuScalar, 2, 1> lambdac{};
143-
lambdac(0) = lambda[2 * c];
144-
lambdac(1) = lambda[2 * c + 1];
145-
SMatrix<GpuScalar, 2, 1> atilde{};
146-
atilde(0) = alpha[2 * c] / dt2;
147-
atilde(1) = alpha[2 * c + 1] / dt2;
132+
SMatrix<GpuIndex, 4, 1> v = FromBuffers<4, 1>(T, c);
133+
SMatrix<GpuScalar, 3, 4> xc = FromBuffers(x, v.Transpose());
134+
SMatrix<GpuScalar, 4, 1> minvc = FromFlatBuffer(minv, v);
135+
SMatrix<GpuScalar, 2, 1> lambdac = FromFlatBuffer<2, 1>(lambda, c);
136+
SMatrix<GpuScalar, 2, 1> atilde = FromFlatBuffer<2, 1>(alpha, c);
137+
atilde /= dt2;
148138

149139
// 2. Project elastic constraints
150140
ProjectDeviatoric(c, minvc, atilde(0), lambdac(0), xc);
151141
ProjectHydrostatic(c, minvc, atilde(1), gamma[c], lambdac(1), xc);
152142

153143
// 3. Update global "Lagrange" multipliers and positions
154-
lambda[2 * c] = lambdac(0);
155-
lambda[2 * c + 1] = lambdac(1);
156-
for (auto d = 0; d < 3; ++d)
157-
for (auto j = 0; j < 4; ++j)
158-
x[d][v[j]] = xc(d, j);
144+
ToFlatBuffer(lambdac, lambda, c);
145+
ToBuffers(xc, v.Transpose(), x);
159146
}
160147

161148
std::array<GpuScalar*, 3> x;
@@ -169,19 +156,18 @@ struct FStableNeoHookeanConstraint
169156
GpuScalar dt2;
170157
};
171158

172-
PBAT_DEVICE pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 1>
173-
PointOnPlane(auto const& X, auto const& P, auto const& n)
159+
PBAT_DEVICE mini::SMatrix<GpuScalar, 3, 1> PointOnPlane(auto const& X, auto const& P, auto const& n)
174160
{
175-
using namespace pbat::math::linalg::mini;
161+
using namespace mini;
176162
GpuScalar const t = (n.Transpose() * (X - P))(0, 0);
177163
SMatrix<GpuScalar, 3, 1> Xp = X - t * n;
178164
return Xp;
179165
}
180166

181-
PBAT_DEVICE pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 1>
167+
PBAT_DEVICE mini::SMatrix<GpuScalar, 3, 1>
182168
TriangleBarycentricCoordinates(auto const& AP, auto const& AB, auto const& AC)
183169
{
184-
using namespace pbat::math::linalg::mini;
170+
using namespace mini;
185171
GpuScalar const d00 = Dot(AB, AB);
186172
GpuScalar const d01 = Dot(AB, AC);
187173
GpuScalar const d11 = Dot(AC, AC);
@@ -204,15 +190,15 @@ struct FVertexTriangleContactConstraint
204190

205191
PBAT_DEVICE bool ProjectVertexTriangle(
206192
GpuScalar minvv,
207-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 1> const& minvf,
208-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 1> const& xvt,
209-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 3> const& xft,
210-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 3> const& xf,
193+
mini::SMatrix<GpuScalar, 3, 1> const& minvf,
194+
mini::SMatrix<GpuScalar, 3, 1> const& xvt,
195+
mini::SMatrix<GpuScalar, 3, 3> const& xft,
196+
mini::SMatrix<GpuScalar, 3, 3> const& xf,
211197
GpuScalar atildec,
212198
GpuScalar& lambdac,
213-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 1>& xv)
199+
mini::SMatrix<GpuScalar, 3, 1>& xv)
214200
{
215-
using namespace pbat::math::linalg::mini;
201+
using namespace mini;
216202
// Numerically zero inverse mass makes the Schur complement ill-conditioned/singular
217203
if (minvv < GpuScalar{1e-10})
218204
return false;
@@ -232,17 +218,17 @@ struct FVertexTriangleContactConstraint
232218
// If xv doesn't project inside triangle, then we don't generate a contact response
233219
// clang-format off
234220
bool const bIsVertexInsideTriangle =
235-
(b(0) >= GpuScalar{0.f}) and (b(0) <= GpuScalar{1.f}) and
236-
(b(1) >= GpuScalar{0.f}) and (b(1) <= GpuScalar{1.f}) and
237-
(b(2) >= GpuScalar{0.f}) and (b(2) <= GpuScalar{1.f});
221+
(b(0) >= GpuScalar{0}) and (b(0) <= GpuScalar{1}) and
222+
(b(1) >= GpuScalar{0}) and (b(1) <= GpuScalar{1}) and
223+
(b(2) >= GpuScalar{0}) and (b(2) <= GpuScalar{1});
238224
// clang-format on
239225
if (not bIsVertexInsideTriangle)
240226
return false;
241227

242228
// Project xv onto triangle's plane into xc
243229
GpuScalar const C = Dot(n, xv - xf.Col(0));
244230
// If xv is positively oriented w.r.t. triangles xf, there is no penetration
245-
if (C > GpuScalar{0.})
231+
if (C > GpuScalar{0})
246232
return false;
247233
// Prevent super violent projection for stability, i.e. if the collision constraint
248234
// violation is already too large, we give up.
@@ -271,58 +257,32 @@ struct FVertexTriangleContactConstraint
271257
return true;
272258
}
273259

274-
PBAT_DEVICE void SetParticlePosition(
275-
GpuIndex v,
276-
pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 1> const& xv,
277-
std::array<GpuScalar*, 3> const& xx)
278-
{
279-
for (auto d = 0; d < 3; ++d)
280-
xx[d][v] = xv(d, 0);
281-
}
282-
283-
PBAT_DEVICE pbat::math::linalg::mini::SMatrix<GpuScalar, 3, 1>
284-
GetParticlePosition(GpuIndex v, std::array<GpuScalar*, 3> const& xx)
285-
{
286-
using namespace pbat::math::linalg::mini;
287-
SMatrix<GpuScalar, 3, 1> xv{};
288-
for (auto d = 0; d < 3; ++d)
289-
xv(d, 0) = xx[d][v];
290-
return xv;
291-
}
292-
293260
PBAT_DEVICE void operator()(GpuIndex c)
294261
{
295-
using namespace pbat::math::linalg::mini;
296-
GpuIndex vv = V[0][cpairs[c].first]; ///< Colliding vertex
297-
GpuIndex vf[3] = {
262+
using namespace mini;
263+
GpuIndex vv = V[0][cpairs[c].first]; ///< Colliding vertex
264+
SMatrix<GpuIndex, 3, 1> vf{
298265
F[0][cpairs[c].second],
299266
F[1][cpairs[c].second],
300267
F[2][cpairs[c].second]}; ///< Colliding triangle
301268

302-
SMatrix<GpuScalar, 3, 1> xv = GetParticlePosition(vv, x);
303-
SMatrix<GpuScalar, 3, 3> xf{};
304-
for (auto j = 0; j < 3; ++j)
305-
xf.Col(j) = GetParticlePosition(vf[j], x);
306-
307-
SMatrix<GpuScalar, 3, 1> xvt = GetParticlePosition(vv, xt);
308-
SMatrix<GpuScalar, 3, 3> xft{};
309-
for (auto j = 0; j < 3; ++j)
310-
xft.Col(j) = GetParticlePosition(vf[j], xt);
269+
SMatrix<GpuScalar, 3, 1> xvt = FromBuffers<3, 1>(xt, vv);
270+
SMatrix<GpuScalar, 3, 1> xv = FromBuffers<3, 1>(x, vv);
271+
SMatrix<GpuScalar, 3, 3> xft = FromBuffers(xt, vf.Transpose());
272+
SMatrix<GpuScalar, 3, 3> xf = FromBuffers(x, vf.Transpose());
311273

312-
GpuScalar const minvv = minv[vv];
313-
SMatrix<GpuScalar, 3, 1> minvf{};
314-
for (auto j = 0; j < 3; ++j)
315-
minvf(j, 0) = minv[vf[j]];
316-
GpuScalar atildec = alpha[c] / dt2;
317-
GpuScalar lambdac = lambda[c];
274+
GpuScalar const minvv = minv[vv];
275+
SMatrix<GpuScalar, 3, 1> minvf = FromFlatBuffer(minv, vf);
276+
GpuScalar atildec = alpha[c] / dt2;
277+
GpuScalar lambdac = lambda[c];
318278

319279
// 2. Project collision constraint
320280
if (not ProjectVertexTriangle(minvv, minvf, xvt, xft, xf, atildec, lambdac, xv))
321281
return;
322282

323283
// 3. Update global positions
324284
// lambda[c] = lambdac;
325-
SetParticlePosition(vv, xv, x);
285+
ToBuffers(xv, x, vv);
326286
}
327287

328288
std::array<GpuScalar*, 3> x; ///< Current positions

source/pbat/math/linalg/mini/Matrix.h

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ PBAT_HOST_DEVICE auto FromFlatBuffer(TScalar* buf, TIndexMatrix const& inds)
247247
static_assert(std::is_integral_v<IntegerType>, "inds must be matrix of indices");
248248
auto constexpr M = TIndexMatrix::kRows;
249249
auto constexpr N = TIndexMatrix::kCols;
250-
SMatrix<std::remove_const_t<TScalar>, M, N> A{};
250+
using ScalarType = std::remove_cvref_t<TScalar>;
251+
SMatrix<ScalarType, M, N> A{};
251252
using pbat::common::ForRange;
252253
ForRange<0, N>([&]<auto j>() { ForRange<0, M>([&]<auto i>() { A(i, j) = buf[inds(i, j)]; }); });
253254
return A;
@@ -257,9 +258,10 @@ template <CMatrix TMatrix, class IndexType>
257258
PBAT_HOST_DEVICE void
258259
ToFlatBuffer(TMatrix const& A, typename TMatrix::ScalarType* buf, IndexType bi)
259260
{
260-
auto constexpr M = TMatrix::kRows;
261-
auto constexpr N = TMatrix::kCols;
262-
FromFlatBuffer<M, N>(buf, bi) = A;
261+
using ScalarType = typename TMatrix::ScalarType;
262+
auto constexpr M = TMatrix::kRows;
263+
auto constexpr N = TMatrix::kCols;
264+
FromFlatBuffer<M, N, ScalarType>(buf, bi) = A;
263265
}
264266

265267
template <CMatrix TMatrix, CMatrix TIndexMatrix>
@@ -289,13 +291,12 @@ ToFlatBuffer(TMatrix const& A, TIndexMatrix const& inds, typename TMatrix::Scala
289291
}
290292

291293
template <int M, int N, class TScalar, class IndexType>
292-
PBAT_HOST_DEVICE auto
293-
FromBuffers([[maybe_unused]] std::array<TScalar*, M> buf, [[maybe_unused]] IndexType bi)
294+
PBAT_HOST_DEVICE auto FromBuffers(std::array<TScalar*, M> buf, IndexType bi)
294295
{
295296
using ScalarType = std::remove_const_t<TScalar>;
296297
SMatrix<ScalarType, M, N> A{};
297298
using pbat::common::ForRange;
298-
ForRange<0, M>([&]<auto i>() { A.Row(i) = FromFlatBuffer<1, N>(buf[i], bi); });
299+
ForRange<0, M>([&]<auto i>() { A.Row(i) = FromFlatBuffer<1, N, ScalarType>(buf[i], bi); });
299300
return A;
300301
}
301302

@@ -306,9 +307,11 @@ PBAT_HOST_DEVICE auto FromBuffers(std::array<TScalar*, K> buf, TIndexMatrix cons
306307
static_assert(std::is_integral_v<IntegerType>, "inds must be matrix of indices");
307308
auto constexpr M = TIndexMatrix::kRows;
308309
auto constexpr N = TIndexMatrix::kCols;
309-
SMatrix<std::remove_const_t<TScalar>, K * M, N> A{};
310+
using ScalarType = std::remove_cvref_t<TScalar>;
311+
SMatrix<ScalarType, K * M, N> A{};
310312
using pbat::common::ForRange;
311-
ForRange<0, K>([&]<auto k>() { A.Slice<M, N>(k * M, 0) = FromFlatBuffer(buf[k], inds); });
313+
ForRange<0, K>(
314+
[&]<auto k>() { A.Slice<M, N>(k * M, 0) = FromFlatBuffer<ScalarType>(buf[k], inds); });
312315
return A;
313316
}
314317

@@ -318,8 +321,9 @@ ToBuffers(TMatrix const& A, std::array<typename TMatrix::ScalarType*, M> buf, In
318321
{
319322
static_assert(M == TMatrix::kRows, "A must have same rows as number of buffers");
320323
auto constexpr N = TMatrix::kCols;
324+
using ScalarType = typename TMatrix::ScalarType;
321325
using pbat::common::ForRange;
322-
ForRange<0, M>([&]<auto i>() { FromFlatBuffer<1, N>(buf[i], bi) = A.Row(i); });
326+
ForRange<0, M>([&]<auto i>() { FromFlatBuffer<1, N, ScalarType>(buf[i], bi) = A.Row(i); });
323327
}
324328

325329
template <CMatrix TMatrix, CMatrix TIndexMatrix, int K>
@@ -335,6 +339,7 @@ PBAT_HOST_DEVICE void ToBuffers(
335339
static_assert(MA % MI == 0, "Rows of A must be multiple of rows of inds");
336340
static_assert(NA == NI, "A and inds must have same number of columns");
337341
static_assert(MA / MI == K, "A must have number of rows == #buffers*#rows of inds");
342+
using ScalarType = typename TMatrix::ScalarType;
338343
using pbat::common::ForRange;
339344
ForRange<0, K>([&]<auto k>() { ToFlatBuffer(A.Slice<MI, NI>(k * MI, 0), inds, buf[k]); });
340345
}

source/pbat/math/linalg/mini/UnaryOperations.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,32 @@ class Square
4343
NestedType const& A;
4444
};
4545

46+
template <CMatrix TLhsMatrix>
47+
class Reciprocal
48+
{
49+
public:
50+
using LhsNestedType = TLhsMatrix;
51+
using ScalarType = typename LhsNestedType::ScalarType;
52+
using SelfType = Reciprocal<LhsNestedType>;
53+
54+
static auto constexpr kRows = LhsNestedType::kRows;
55+
static auto constexpr kCols = LhsNestedType::kCols;
56+
static bool constexpr bRowMajor = LhsNestedType::bRowMajor;
57+
58+
PBAT_HOST_DEVICE Reciprocal(LhsNestedType const& A) : mA(A) {}
59+
60+
PBAT_HOST_DEVICE ScalarType operator()(auto i, auto j) const { return ScalarType(1) / mA(i, j); }
61+
62+
// Vector(ized) access
63+
PBAT_HOST_DEVICE ScalarType operator()(auto i) const { return (*this)(i % kRows, i / kRows); }
64+
PBAT_HOST_DEVICE ScalarType operator[](auto i) const { return (*this)(i); }
65+
66+
PBAT_MINI_READ_API(SelfType)
67+
68+
private:
69+
LhsNestedType const& mA;
70+
};
71+
4672
template <CMatrix TMatrix>
4773
class Absolute
4874
{
@@ -117,6 +143,14 @@ PBAT_HOST_DEVICE auto Max(TMatrix const& A)
117143
return maximum(std::make_integer_sequence<IntegerType, TMatrix::kRows * TMatrix::kCols>());
118144
}
119145

146+
template <class /*CMatrix*/ TMatrix>
147+
PBAT_HOST_DEVICE auto operator/(typename std::remove_cvref_t<TMatrix>::ScalarType k, TMatrix&& A)
148+
{
149+
using MatrixType = std::remove_cvref_t<TMatrix>;
150+
PBAT_MINI_CHECK_CMATRIX(MatrixType);
151+
return k * Reciprocal<MatrixType>(std::forward<TMatrix>(A));
152+
}
153+
120154
} // namespace mini
121155
} // namespace linalg
122156
} // namespace math

0 commit comments

Comments
 (0)