Skip to content

Commit d465929

Browse files
OpenMP parallelize grid initialization (set_epsilon) (#3166)
* OpenMP parallelize grid initialization (set_epsilon) The grid initialization loop in set_chi1inv iterates over all grid points to compute the inverse permittivity tensor. At high resolutions (e.g., 178M points at resolution 200), this takes minutes on a single core. Parallelize the loop with OpenMP (PLOOP_OVER_IVECS_C) when the material function is thread-safe (standard C++ geometry objects, not Python callbacks). The trivial[] flags use OpenMP reduction. A new virtual method material_function::is_thread_safe() returns false by default (safe for Python callbacks). The geom_epsilon subclass overrides it to return true unless MATERIAL_USER materials are present. Measured speedup: 30.7s -> 3.2s at resolution 100 (9.7x with 22 cores). Set OMP_NUM_THREADS to control parallelism. * Fix lint errors. Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent 72c42e2 commit d465929

File tree

4 files changed

+73
-28
lines changed

4 files changed

+73
-28
lines changed

src/anisotropic_averaging.cpp

Lines changed: 58 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -246,37 +246,67 @@ void structure_chunk::set_chi1inv(component c, material_function &medium,
246246
double trivial_val[3] = {0, 0, 0};
247247
trivial_val[idiag] = 1.0;
248248
ivec shift1(unit_ivec(gv.dim, component_direction(c)) * (ft == E_stuff ? 1 : -1));
249-
// TODO: make this loop thread-safe and change to PLOOP_OVER_VOL
250-
// Note that we *cannot* make it thread-safe if `medium` is not thread-safe,
251-
// e.g. if it calls back to Python.
252-
LOOP_OVER_VOL(gv, c, i) {
253-
double chi1invrow[3], chi1invrow_offdiag[3];
254-
IVEC_LOOP_ILOC(gv, here);
255-
medium.eff_chi1inv_row(c, chi1invrow, gv.dV(here, smoothing_diameter), tol, maxeval);
256-
medium.eff_chi1inv_row(c, chi1invrow_offdiag, gv.dV(here - shift1, smoothing_diameter), tol,
257-
maxeval);
258-
if (chi1inv[c][d0]) {
259-
chi1inv[c][d0][i] = (d0 == dc) ? chi1invrow[0] : chi1invrow_offdiag[0];
260-
trivial[0] = trivial[0] && (chi1inv[c][d0][i] == trivial_val[0]);
261-
}
262-
if (chi1inv[c][d1]) {
263-
chi1inv[c][d1][i] = (d1 == dc) ? chi1invrow[1] : chi1invrow_offdiag[1];
264-
trivial[1] = trivial[1] && (chi1inv[c][d1][i] == trivial_val[1]);
265-
}
266-
if (chi1inv[c][d2]) {
267-
chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2] : chi1invrow_offdiag[2];
268-
trivial[2] = trivial[2] && (chi1inv[c][d2][i] == trivial_val[2]);
249+
// Use OpenMP parallelization when the material function is thread-safe
250+
// (i.e., pure C++ geometry, not a Python callback). The trivial[] flags
251+
// need a reduction since each thread computes its local subset.
252+
if (medium.is_thread_safe()) {
253+
bool trivial0 = true, trivial1 = true, trivial2 = true;
254+
PLOOP_OVER_IVECS_C(gv, gv.little_corner() + gv.iyee_shift(c),
255+
gv.big_corner() + gv.iyee_shift(c), i,
256+
"omp parallel for collapse(3) reduction(&&:trivial0,trivial1,trivial2)") {
257+
double chi1invrow[3], chi1invrow_offdiag[3];
258+
IVEC_LOOP_ILOC(gv, here);
259+
medium.eff_chi1inv_row(c, chi1invrow, gv.dV(here, smoothing_diameter), tol, maxeval);
260+
medium.eff_chi1inv_row(c, chi1invrow_offdiag, gv.dV(here - shift1, smoothing_diameter), tol,
261+
maxeval);
262+
if (chi1inv[c][d0]) {
263+
chi1inv[c][d0][i] = (d0 == dc) ? chi1invrow[0] : chi1invrow_offdiag[0];
264+
trivial0 = trivial0 && (chi1inv[c][d0][i] == trivial_val[0]);
265+
}
266+
if (chi1inv[c][d1]) {
267+
chi1inv[c][d1][i] = (d1 == dc) ? chi1invrow[1] : chi1invrow_offdiag[1];
268+
trivial1 = trivial1 && (chi1inv[c][d1][i] == trivial_val[1]);
269+
}
270+
if (chi1inv[c][d2]) {
271+
chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2] : chi1invrow_offdiag[2];
272+
trivial2 = trivial2 && (chi1inv[c][d2][i] == trivial_val[2]);
273+
}
269274
}
275+
trivial[0] = trivial0;
276+
trivial[1] = trivial1;
277+
trivial[2] = trivial2;
278+
}
279+
else {
280+
// Serial path for non-thread-safe material functions (Python callbacks)
281+
LOOP_OVER_VOL(gv, c, i) {
282+
double chi1invrow[3], chi1invrow_offdiag[3];
283+
IVEC_LOOP_ILOC(gv, here);
284+
medium.eff_chi1inv_row(c, chi1invrow, gv.dV(here, smoothing_diameter), tol, maxeval);
285+
medium.eff_chi1inv_row(c, chi1invrow_offdiag, gv.dV(here - shift1, smoothing_diameter), tol,
286+
maxeval);
287+
if (chi1inv[c][d0]) {
288+
chi1inv[c][d0][i] = (d0 == dc) ? chi1invrow[0] : chi1invrow_offdiag[0];
289+
trivial[0] = trivial[0] && (chi1inv[c][d0][i] == trivial_val[0]);
290+
}
291+
if (chi1inv[c][d1]) {
292+
chi1inv[c][d1][i] = (d1 == dc) ? chi1invrow[1] : chi1invrow_offdiag[1];
293+
trivial[1] = trivial[1] && (chi1inv[c][d1][i] == trivial_val[1]);
294+
}
295+
if (chi1inv[c][d2]) {
296+
chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2] : chi1invrow_offdiag[2];
297+
trivial[2] = trivial[2] && (chi1inv[c][d2][i] == trivial_val[2]);
298+
}
270299

271-
if (verbosity > 0 && (ipixel + 1) % 1000 == 0 &&
272-
wall_time() > last_output_time + MEEP_MIN_OUTPUT_TIME) {
273-
master_printf("%s is %g%% done, %g s remaining\n",
274-
use_anisotropic_averaging ? "subpixel-averaging" : "grid initialization",
275-
ipixel * 100.0 / npixels,
276-
(npixels - ipixel) * (wall_time() - last_output_time) / ipixel);
277-
last_output_time = wall_time();
300+
if (verbosity > 0 && (ipixel + 1) % 1000 == 0 &&
301+
wall_time() > last_output_time + MEEP_MIN_OUTPUT_TIME) {
302+
master_printf("%s is %g%% done, %g s remaining\n",
303+
use_anisotropic_averaging ? "subpixel-averaging" : "grid initialization",
304+
ipixel * 100.0 / npixels,
305+
(npixels - ipixel) * (wall_time() - last_output_time) / ipixel);
306+
last_output_time = wall_time();
307+
}
308+
++ipixel;
278309
}
279-
++ipixel;
280310
}
281311
direction ds[3];
282312
ds[0] = d0;

src/meep.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,11 @@ class material_function {
523523
double tol = DEFAULT_SUBPIXEL_TOL,
524524
int maxeval = DEFAULT_SUBPIXEL_MAXEVAL);
525525

526+
// Returns true if eff_chi1inv_row and chi1p1 are safe to call from
527+
// multiple OpenMP threads. Override in subclasses that use Python
528+
// callbacks or other non-thread-safe state. Default: false (serial).
529+
virtual bool is_thread_safe() const { return false; }
530+
526531
/* polarizability sigma function: return c'th row of tensor */
527532
virtual void sigma_row(component c, double sigrow[3], const vec &r) {
528533
(void)c;

src/meepgeom.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,11 +658,15 @@ geom_epsilon::geom_epsilon(geometric_object_list g, material_type_list mlist,
658658
int length = g.num_items;
659659
geometry.num_items = length;
660660
geometry.items = new geometric_object[length];
661+
has_user_materials = false;
661662
for (int i = 0; i < length; i++) {
662663
geometric_object_copy(&g.items[i], &geometry.items[i]);
663664
geometry.items[i].material = new material_data();
664665
static_cast<material_data *>(geometry.items[i].material)
665666
->copy_from(*(material_data *)(g.items[i].material));
667+
if (static_cast<material_data *>(g.items[i].material)->which_subclass ==
668+
material_data::MATERIAL_USER)
669+
has_user_materials = true;
666670
}
667671

668672
extra_materials = mlist;
@@ -713,6 +717,7 @@ geom_epsilon::geom_epsilon(const geom_epsilon &geps1) {
713717
int length = geps1.geometry.num_items;
714718
geometry.num_items = length;
715719
geometry.items = new geometric_object[length];
720+
has_user_materials = geps1.has_user_materials;
716721
for (int i = 0; i < length; i++) {
717722
geometric_object_copy(&geps1.geometry.items[i], &geometry.items[i]);
718723
geometry.items[i].material = new material_data();

src/meepgeom.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ class geom_epsilon : public meep::material_function {
207207
virtual void eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v,
208208
double tol, int maxeval);
209209

210+
// Thread-safe for standard C++ geometry (no Python callbacks).
211+
// Only unsafe when MATERIAL_USER materials are present.
212+
virtual bool is_thread_safe() const { return !has_user_materials; }
213+
bool has_user_materials;
214+
210215
void eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix, const meep::volume &v,
211216
double tol, int maxeval, bool &fallback);
212217

0 commit comments

Comments
 (0)