@@ -246,37 +246,67 @@ void structure_chunk::set_chi1inv(component c, material_function &medium,
246246 double trivial_val[3 ] = {0 , 0 , 0 };
247247 trivial_val[idiag] = 1.0 ;
248248 ivec shift1 (unit_ivec (gv.dim , component_direction (c)) * (ft == E_stuff ? 1 : -1 ));
249- // TODO: make this loop thread-safe and change to PLOOP_OVER_VOL
250- // Note that we *cannot* make it thread-safe if `medium` is not thread-safe,
251- // e.g. if it calls back to Python.
252- LOOP_OVER_VOL (gv, c, i) {
253- double chi1invrow[3 ], chi1invrow_offdiag[3 ];
254- IVEC_LOOP_ILOC (gv, here);
255- medium.eff_chi1inv_row (c, chi1invrow, gv.dV (here, smoothing_diameter), tol, maxeval);
256- medium.eff_chi1inv_row (c, chi1invrow_offdiag, gv.dV (here - shift1, smoothing_diameter), tol,
257- maxeval);
258- if (chi1inv[c][d0]) {
259- chi1inv[c][d0][i] = (d0 == dc) ? chi1invrow[0 ] : chi1invrow_offdiag[0 ];
260- trivial[0 ] = trivial[0 ] && (chi1inv[c][d0][i] == trivial_val[0 ]);
261- }
262- if (chi1inv[c][d1]) {
263- chi1inv[c][d1][i] = (d1 == dc) ? chi1invrow[1 ] : chi1invrow_offdiag[1 ];
264- trivial[1 ] = trivial[1 ] && (chi1inv[c][d1][i] == trivial_val[1 ]);
265- }
266- if (chi1inv[c][d2]) {
267- chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2 ] : chi1invrow_offdiag[2 ];
268- trivial[2 ] = trivial[2 ] && (chi1inv[c][d2][i] == trivial_val[2 ]);
249+ // Use OpenMP parallelization when the material function is thread-safe
250+ // (i.e., pure C++ geometry, not a Python callback). The trivial[] flags
251+ // need a reduction since each thread computes its local subset.
252+ if (medium.is_thread_safe ()) {
253+ bool trivial0 = true , trivial1 = true , trivial2 = true ;
254+ PLOOP_OVER_IVECS_C (gv, gv.little_corner () + gv.iyee_shift (c),
255+ gv.big_corner () + gv.iyee_shift (c), i,
256+ " omp parallel for collapse(3) reduction(&&:trivial0,trivial1,trivial2)" ) {
257+ double chi1invrow[3 ], chi1invrow_offdiag[3 ];
258+ IVEC_LOOP_ILOC (gv, here);
259+ medium.eff_chi1inv_row (c, chi1invrow, gv.dV (here, smoothing_diameter), tol, maxeval);
260+ medium.eff_chi1inv_row (c, chi1invrow_offdiag, gv.dV (here - shift1, smoothing_diameter), tol,
261+ maxeval);
262+ if (chi1inv[c][d0]) {
263+ chi1inv[c][d0][i] = (d0 == dc) ? chi1invrow[0 ] : chi1invrow_offdiag[0 ];
264+ trivial0 = trivial0 && (chi1inv[c][d0][i] == trivial_val[0 ]);
265+ }
266+ if (chi1inv[c][d1]) {
267+ chi1inv[c][d1][i] = (d1 == dc) ? chi1invrow[1 ] : chi1invrow_offdiag[1 ];
268+ trivial1 = trivial1 && (chi1inv[c][d1][i] == trivial_val[1 ]);
269+ }
270+ if (chi1inv[c][d2]) {
271+ chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2 ] : chi1invrow_offdiag[2 ];
272+ trivial2 = trivial2 && (chi1inv[c][d2][i] == trivial_val[2 ]);
273+ }
269274 }
275+ trivial[0 ] = trivial0;
276+ trivial[1 ] = trivial1;
277+ trivial[2 ] = trivial2;
278+ }
279+ else {
280+ // Serial path for non-thread-safe material functions (Python callbacks)
281+ LOOP_OVER_VOL (gv, c, i) {
282+ double chi1invrow[3 ], chi1invrow_offdiag[3 ];
283+ IVEC_LOOP_ILOC (gv, here);
284+ medium.eff_chi1inv_row (c, chi1invrow, gv.dV (here, smoothing_diameter), tol, maxeval);
285+ medium.eff_chi1inv_row (c, chi1invrow_offdiag, gv.dV (here - shift1, smoothing_diameter), tol,
286+ maxeval);
287+ if (chi1inv[c][d0]) {
288+ chi1inv[c][d0][i] = (d0 == dc) ? chi1invrow[0 ] : chi1invrow_offdiag[0 ];
289+ trivial[0 ] = trivial[0 ] && (chi1inv[c][d0][i] == trivial_val[0 ]);
290+ }
291+ if (chi1inv[c][d1]) {
292+ chi1inv[c][d1][i] = (d1 == dc) ? chi1invrow[1 ] : chi1invrow_offdiag[1 ];
293+ trivial[1 ] = trivial[1 ] && (chi1inv[c][d1][i] == trivial_val[1 ]);
294+ }
295+ if (chi1inv[c][d2]) {
296+ chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2 ] : chi1invrow_offdiag[2 ];
297+ trivial[2 ] = trivial[2 ] && (chi1inv[c][d2][i] == trivial_val[2 ]);
298+ }
270299
271- if (verbosity > 0 && (ipixel + 1 ) % 1000 == 0 &&
272- wall_time () > last_output_time + MEEP_MIN_OUTPUT_TIME) {
273- master_printf (" %s is %g%% done, %g s remaining\n " ,
274- use_anisotropic_averaging ? " subpixel-averaging" : " grid initialization" ,
275- ipixel * 100.0 / npixels,
276- (npixels - ipixel) * (wall_time () - last_output_time) / ipixel);
277- last_output_time = wall_time ();
300+ if (verbosity > 0 && (ipixel + 1 ) % 1000 == 0 &&
301+ wall_time () > last_output_time + MEEP_MIN_OUTPUT_TIME) {
302+ master_printf (" %s is %g%% done, %g s remaining\n " ,
303+ use_anisotropic_averaging ? " subpixel-averaging" : " grid initialization" ,
304+ ipixel * 100.0 / npixels,
305+ (npixels - ipixel) * (wall_time () - last_output_time) / ipixel);
306+ last_output_time = wall_time ();
307+ }
308+ ++ipixel;
278309 }
279- ++ipixel;
280310 }
281311 direction ds[3 ];
282312 ds[0 ] = d0;
0 commit comments