OpenMP parallelize grid initialization (set_epsilon) (#3166)

Luochenghuang · web-flow · commit d465929dc0e8 · 2026-03-13T15:12:59.000-04:00
* OpenMP parallelize grid initialization (set_epsilon)

The grid initialization loop in set_chi1inv iterates over all grid
points to compute the inverse permittivity tensor. At high resolutions
(e.g., 178M points at resolution 200), this takes minutes on a single
core.

Parallelize the loop with OpenMP (PLOOP_OVER_IVECS_C) when the
material function is thread-safe (standard C++ geometry objects, not
Python callbacks). The trivial[] flags use OpenMP reduction.

A new virtual method material_function::is_thread_safe() returns false
by default (safe for Python callbacks). The geom_epsilon subclass
overrides it to return true unless MATERIAL_USER materials are present.

Measured speedup: 30.7s -&gt; 3.2s at resolution 100 (9.7x with 22 cores).
Set OMP_NUM_THREADS to control parallelism.

* Fix lint errors.

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/src/anisotropic_averaging.cpp b/src/anisotropic_averaging.cpp
@@ -246,37 +246,67 @@ void structure_chunk::set_chi1inv(component c, material_function &medium,
   double trivial_val[3] = {0, 0, 0};
   trivial_val[idiag] = 1.0;
   ivec shift1(unit_ivec(gv.dim, component_direction(c)) * (ft == E_stuff ? 1 : -1));
-  // TODO: make this loop thread-safe and change to PLOOP_OVER_VOL
-  // Note that we *cannot* make it thread-safe if `medium` is not thread-safe,
-  // e.g. if it calls back to Python.
-  LOOP_OVER_VOL(gv, c, i) {
-    double chi1invrow[3], chi1invrow_offdiag[3];
-    IVEC_LOOP_ILOC(gv, here);
-    medium.eff_chi1inv_row(c, chi1invrow, gv.dV(here, smoothing_diameter), tol, maxeval);
-    medium.eff_chi1inv_row(c, chi1invrow_offdiag, gv.dV(here - shift1, smoothing_diameter), tol,
-                           maxeval);
-    if (chi1inv[c][d0]) {
-      chi1inv[c][d0][i] = (d0 == dc) ? chi1invrow[0] : chi1invrow_offdiag[0];
-      trivial[0] = trivial[0] && (chi1inv[c][d0][i] == trivial_val[0]);
-    }
-    if (chi1inv[c][d1]) {
-      chi1inv[c][d1][i] = (d1 == dc) ? chi1invrow[1] : chi1invrow_offdiag[1];
-      trivial[1] = trivial[1] && (chi1inv[c][d1][i] == trivial_val[1]);
-    }
-    if (chi1inv[c][d2]) {
-      chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2] : chi1invrow_offdiag[2];
-      trivial[2] = trivial[2] && (chi1inv[c][d2][i] == trivial_val[2]);
+  // Use OpenMP parallelization when the material function is thread-safe
+  // (i.e., pure C++ geometry, not a Python callback). The trivial[] flags
+  // need a reduction since each thread computes its local subset.
+  if (medium.is_thread_safe()) {
+    bool trivial0 = true, trivial1 = true, trivial2 = true;
+    PLOOP_OVER_IVECS_C(gv, gv.little_corner() + gv.iyee_shift(c),
+                       gv.big_corner() + gv.iyee_shift(c), i,
+                       "omp parallel for collapse(3) reduction(&&:trivial0,trivial1,trivial2)") {
+      double chi1invrow[3], chi1invrow_offdiag[3];
+      IVEC_LOOP_ILOC(gv, here);
+      medium.eff_chi1inv_row(c, chi1invrow, gv.dV(here, smoothing_diameter), tol, maxeval);
+      medium.eff_chi1inv_row(c, chi1invrow_offdiag, gv.dV(here - shift1, smoothing_diameter), tol,
+                             maxeval);
+      if (chi1inv[c][d0]) {
+        chi1inv[c][d0][i] = (d0 == dc) ? chi1invrow[0] : chi1invrow_offdiag[0];
+        trivial0 = trivial0 && (chi1inv[c][d0][i] == trivial_val[0]);
+      }
+      if (chi1inv[c][d1]) {
+        chi1inv[c][d1][i] = (d1 == dc) ? chi1invrow[1] : chi1invrow_offdiag[1];
+        trivial1 = trivial1 && (chi1inv[c][d1][i] == trivial_val[1]);
+      }
+      if (chi1inv[c][d2]) {
+        chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2] : chi1invrow_offdiag[2];
+        trivial2 = trivial2 && (chi1inv[c][d2][i] == trivial_val[2]);
+      }
     }
+    trivial[0] = trivial0;
+    trivial[1] = trivial1;
+    trivial[2] = trivial2;
+  }
+  else {
+    // Serial path for non-thread-safe material functions (Python callbacks)
+    LOOP_OVER_VOL(gv, c, i) {
+      double chi1invrow[3], chi1invrow_offdiag[3];
+      IVEC_LOOP_ILOC(gv, here);
+      medium.eff_chi1inv_row(c, chi1invrow, gv.dV(here, smoothing_diameter), tol, maxeval);
+      medium.eff_chi1inv_row(c, chi1invrow_offdiag, gv.dV(here - shift1, smoothing_diameter), tol,
+                             maxeval);
+      if (chi1inv[c][d0]) {
+        chi1inv[c][d0][i] = (d0 == dc) ? chi1invrow[0] : chi1invrow_offdiag[0];
+        trivial[0] = trivial[0] && (chi1inv[c][d0][i] == trivial_val[0]);
+      }
+      if (chi1inv[c][d1]) {
+        chi1inv[c][d1][i] = (d1 == dc) ? chi1invrow[1] : chi1invrow_offdiag[1];
+        trivial[1] = trivial[1] && (chi1inv[c][d1][i] == trivial_val[1]);
+      }
+      if (chi1inv[c][d2]) {
+        chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2] : chi1invrow_offdiag[2];
+        trivial[2] = trivial[2] && (chi1inv[c][d2][i] == trivial_val[2]);
+      }
 
-    if (verbosity > 0 && (ipixel + 1) % 1000 == 0 &&
-        wall_time() > last_output_time + MEEP_MIN_OUTPUT_TIME) {
-      master_printf("%s is %g%% done, %g s remaining\n",
-                    use_anisotropic_averaging ? "subpixel-averaging" : "grid initialization",
-                    ipixel * 100.0 / npixels,
-                    (npixels - ipixel) * (wall_time() - last_output_time) / ipixel);
-      last_output_time = wall_time();
+      if (verbosity > 0 && (ipixel + 1) % 1000 == 0 &&
+          wall_time() > last_output_time + MEEP_MIN_OUTPUT_TIME) {
+        master_printf("%s is %g%% done, %g s remaining\n",
+                      use_anisotropic_averaging ? "subpixel-averaging" : "grid initialization",
+                      ipixel * 100.0 / npixels,
+                      (npixels - ipixel) * (wall_time() - last_output_time) / ipixel);
+        last_output_time = wall_time();
+      }
+      ++ipixel;
     }
-    ++ipixel;
   }
   direction ds[3];
   ds[0] = d0;
diff --git a/src/meep.hpp b/src/meep.hpp
@@ -523,6 +523,11 @@ class material_function {
                                double tol = DEFAULT_SUBPIXEL_TOL,
                                int maxeval = DEFAULT_SUBPIXEL_MAXEVAL);
 
+  // Returns true if eff_chi1inv_row and chi1p1 are safe to call from
+  // multiple OpenMP threads. Override in subclasses that use Python
+  // callbacks or other non-thread-safe state. Default: false (serial).
+  virtual bool is_thread_safe() const { return false; }
+
   /* polarizability sigma function: return c'th row of tensor */
   virtual void sigma_row(component c, double sigrow[3], const vec &r) {
     (void)c;
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
@@ -658,11 +658,15 @@ geom_epsilon::geom_epsilon(geometric_object_list g, material_type_list mlist,
   int length = g.num_items;
   geometry.num_items = length;
   geometry.items = new geometric_object[length];
+  has_user_materials = false;
   for (int i = 0; i < length; i++) {
     geometric_object_copy(&g.items[i], &geometry.items[i]);
     geometry.items[i].material = new material_data();
     static_cast<material_data *>(geometry.items[i].material)
         ->copy_from(*(material_data *)(g.items[i].material));
+    if (static_cast<material_data *>(g.items[i].material)->which_subclass ==
+        material_data::MATERIAL_USER)
+      has_user_materials = true;
   }
 
   extra_materials = mlist;
@@ -713,6 +717,7 @@ geom_epsilon::geom_epsilon(const geom_epsilon &geps1) {
   int length = geps1.geometry.num_items;
   geometry.num_items = length;
   geometry.items = new geometric_object[length];
+  has_user_materials = geps1.has_user_materials;
   for (int i = 0; i < length; i++) {
     geometric_object_copy(&geps1.geometry.items[i], &geometry.items[i]);
     geometry.items[i].material = new material_data();
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
@@ -207,6 +207,11 @@ class geom_epsilon : public meep::material_function {
   virtual void eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v,
                                double tol, int maxeval);
 
+  // Thread-safe for standard C++ geometry (no Python callbacks).
+  // Only unsafe when MATERIAL_USER materials are present.
+  virtual bool is_thread_safe() const { return !has_user_materials; }
+  bool has_user_materials;
+
   void eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix, const meep::volume &v,
                           double tol, int maxeval, bool &fallback);