Skip to content

Commit 2ce0f56

Browse files
committed
simplify loop
1 parent 0b4d8ae commit 2ce0f56

File tree

1 file changed

+51
-32
lines changed

1 file changed

+51
-32
lines changed

backend/src/cosserat_equations.h

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,8 @@ inline void compute_internal_forces(BlockType& block) {
217217
}
218218
}
219219

220-
221220
// Reset ghost values for cosserat_internal_stress (OnElement)
222-
block.reset_ghost_for_variable<system::cosserat_rod::ScratchVectorA>();
221+
block.template reset_ghost_for_variable<system::cosserat_rod::ScratchVectorA>();
223222

224223
// Compute internal_forces = two_point_difference_kernel(cosserat_internal_stress)
225224
// internal_forces is OnNode (3, n_nodes) where n_nodes = n_elems + 1
@@ -447,13 +446,13 @@ inline void compute_internal_torques(BlockType& block) {
447446

448447
// Scratch buffers
449448
auto&& voronoi_dilatation_inv_cube_cached = block.template get<system::cosserat_rod::ScratchScalarC>();
450-
auto&& bend_twist_couple_2D = block.template get<system::cosserat_rod::ScratchVectorA>();
451-
auto&& scratch_vec_a_voronoi = block.template get<system::cosserat_rod::ScratchVectorB>();
449+
auto&& bend_twist_couple_2D = block.template get<system::cosserat_rod::InternalTorques>();
450+
auto&& J_omega_upon_e = block.template get<system::cosserat_rod::ScratchVectorA>();
451+
auto&& scratch_vec_b_voronoi = block.template get<system::cosserat_rod::ScratchVectorB>();
452452
auto&& bend_twist_couple_3D = block.template get<system::cosserat_rod::ScratchVectorC>();
453453
auto&& shear_stretch_couple = block.template get<system::cosserat_rod::ScratchVectorD>();
454454
auto&& lagrangian_transport = block.template get<system::cosserat_rod::ScratchVectorE>();
455455
auto&& unsteady_dilatation = block.template get<system::cosserat_rod::ScratchVectorF>();
456-
auto&& director_tangents = block.template get<system::cosserat_rod::ScratchVectorA>();
457456

458457
// Compute voronoi_dilatation_inv_cube_cached = 1.0 / voronoi_dilatation^3
459458
// MatrixType voronoi_dilatation_inv_cube_cached(1, n_voronoi);
@@ -466,7 +465,7 @@ inline void compute_internal_torques(BlockType& block) {
466465
// Compute bend_twist_couple_2D = difference_kernel(internal_couple * voronoi_dilatation_inv_cube_cached, ghost_voronoi_idx)
467466
// First compute the product
468467
// MatrixType internal_couple_scaled(3, n_voronoi);
469-
auto internal_couple_scaled = scratch_vec_a_voronoi;
468+
auto internal_couple_scaled = scratch_vec_b_voronoi;
470469
for (IndexType i = 0; i < 3; ++i) {
471470
#pragma omp parallel for simd schedule(static)
472471
for (IndexType k = 0; k < n_voronoi; ++k) {
@@ -476,7 +475,7 @@ inline void compute_internal_torques(BlockType& block) {
476475

477476
// Reset ghost values for internal_couple_scaled (OnVoronoi)
478477
// reset_ghost :: internal_couple_scaled
479-
block.reset_ghost_for_variable<system::cosserat_rod::ScratchVectorB>();
478+
block.template reset_ghost_for_variable<system::cosserat_rod::ScratchVectorB>();
480479

481480
// Apply difference_kernel (two_point_difference_kernel)
482481
// MatrixType bend_twist_couple_2D(3, n_nodes);
@@ -485,12 +484,12 @@ inline void compute_internal_torques(BlockType& block) {
485484
// Compute bend_twist_couple_3D = quadrature_kernel((kappa x internal_couple) * rest_voronoi_lengths * voronoi_dilatation_inv_cube_cached, ghost_voronoi_idx)
486485
// First compute kappa x internal_couple
487486
// MatrixType kappa_cross_internal_couple(3, n_voronoi);
488-
auto kappa_cross_internal_couple = scratch_vec_a_voronoi;
487+
auto kappa_cross_internal_couple = scratch_vec_b_voronoi;
489488
batch_cross(kappa_cross_internal_couple, kappa, internal_couple);
490489

491490
// Multiply by rest_voronoi_lengths and voronoi_dilatation_inv_cube_cached
492491
// MatrixType bend_twist_couple_3D_input(3, n_voronoi);
493-
auto bend_twist_couple_3D_input = scratch_vec_a_voronoi;
492+
auto bend_twist_couple_3D_input = scratch_vec_b_voronoi;
494493
for (IndexType i = 0; i < 3; ++i) {
495494
#pragma omp parallel for simd schedule(static)
496495
for (IndexType k = 0; k < n_voronoi; ++k) {
@@ -502,42 +501,62 @@ inline void compute_internal_torques(BlockType& block) {
502501

503502
// Reset ghost values
504503
// reset_ghost:: bend_twist_couple_3D_input;
505-
block.reset_ghost_for_variable<system::cosserat_rod::ScratchVectorB>();
504+
block.template reset_ghost_for_variable<system::cosserat_rod::ScratchVectorB>();
506505

507506
// Apply quadrature_kernel (trapezoidal)
508507
// MatrixType bend_twist_couple_3D(3, n_nodes);
509508
quadrature_kernel(bend_twist_couple_3D, bend_twist_couple_3D_input);
510509

511510
// Compute shear_stretch_couple = (Q^T * tangents) x internal_stress * rest_lengths
512511
// First compute Q^T * tangents (same as director^T @ tangents)
512+
// { // DEPRECATED BLOCK: REPLACED WITH BELOW OPS
513513
// MatrixType director_tangents(3, n_elems);
514-
for (IndexType i = 0; i < 3; ++i) {
515-
#pragma omp parallel for simd schedule(static)
516-
for (IndexType k = 0; k < n_elems; ++k) {
517-
double sum = 0.0;
518-
for (IndexType j = 0; j < 3; ++j) {
519-
IndexType director_idx = j * 3 + i;
520-
sum += director(director_idx, k) * tangents(j, k);
521-
}
522-
director_tangents(i, k) = sum;
523-
}
524-
}
514+
// for (IndexType i = 0; i < 3; ++i) {
515+
// #pragma omp parallel for simd schedule(static)
516+
// for (IndexType k = 0; k < n_elems; ++k) {
517+
// double sum = 0.0;
518+
// for (IndexType j = 0; j < 3; ++j) {
519+
// IndexType director_idx = j * 3 + i;
520+
// sum += director(director_idx, k) * tangents(j, k);
521+
// }
522+
// director_tangents(i, k) = sum;
523+
// }
524+
// }
525+
526+
// // Compute cross product: (Q^T * tangents) x internal_stress
527+
// // MatrixType shear_stretch_couple(3, n_elems);
528+
// batch_cross(shear_stretch_couple, director_tangents, internal_stress);
529+
530+
// // Multiply by rest_lengths
531+
// for (IndexType i = 0; i < 3; ++i) {
532+
// #pragma omp parallel for simd schedule(static)
533+
// for (IndexType k = 0; k < n_elems; ++k) {
534+
// shear_stretch_couple(i, k) *= rest_lengths(0, k);
535+
// }
536+
// }
537+
// }
525538

526-
// Compute cross product: (Q^T * tangents) x internal_stress
527-
// MatrixType shear_stretch_couple(3, n_elems);
528-
batch_cross(shear_stretch_couple, director_tangents, internal_stress);
539+
#pragma omp parallel for simd schedule(static)
540+
for (IndexType k = 0; k < n_elems; ++k) {
541+
// Compute Q^T * tangents
529542

530-
// Multiply by rest_lengths
531-
for (IndexType i = 0; i < 3; ++i) {
532-
#pragma omp parallel for simd schedule(static)
533-
for (IndexType k = 0; k < n_elems; ++k) {
534-
shear_stretch_couple(i, k) *= rest_lengths(0, k);
535-
}
543+
double a0 = director(0, k) * tangents(0, k) + director(3, k) * tangents(1, k) + director(6, k) * tangents(2, k);
544+
double a1 = director(1, k) * tangents(0, k) + director(4, k) * tangents(1, k) + director(7, k) * tangents(2, k);
545+
double a2 = director(2, k) * tangents(0, k) + director(5, k) * tangents(1, k) + director(8, k) * tangents(2, k);
546+
547+
// internal_stress alias
548+
double i0 = internal_stress(0, k);
549+
double i1 = internal_stress(1, k);
550+
double i2 = internal_stress(2, k);
551+
552+
// cross-product (Q*t x n) * l_hat
553+
shear_stretch_couple(0, k) += (a1 * i2 - a2 * i1) * rest_lengths(0, k);
554+
shear_stretch_couple(1, k) += (a2 * i0 - a0 * i2) * rest_lengths(0, k);
555+
shear_stretch_couple(2, k) += (a0 * i1 - a1 * i0) * rest_lengths(0, k);
536556
}
537557

538558
// Compute J_omega_upon_e = batch_matvec(mass_second_moment_of_inertia, omega) / dilatation
539559
// MatrixType J_omega_upon_e(3, n_elems);
540-
auto J_omega_upon_e = director_tangents;
541560
#pragma omp parallel for simd schedule(static)
542561
for (IndexType k = 0; k < n_elems; ++k) {
543562
// Extract 3x3 mass_second_moment_of_inertia for element k
@@ -576,7 +595,7 @@ inline void compute_internal_torques(BlockType& block) {
576595
for (IndexType i = 0; i < 3; ++i) {
577596
#pragma omp parallel for simd schedule(static)
578597
for (IndexType k = 0; k < n_elems; ++k) {
579-
internal_torques(i, k) = bend_twist_couple_2D(i, k) +
598+
internal_torques(i, k) += // bend_twist_couple_2D(i, k) + // Note: internal_torques share bend_twist_couple_2D
580599
bend_twist_couple_3D(i, k) +
581600
shear_stretch_couple(i, k) +
582601
lagrangian_transport(i, k) +

0 commit comments

Comments
 (0)