feat: Support for adjoint simulations with no sources (zero gradients)

yaugenst-flex · yaugenst-flex · commit 9a35483cbf22 · 2024-11-11T10:55:26.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Differentiable `smooth_min`, `smooth_max`, and `least_squares` functions in `tidy3d.plugins.autograd`.
 - Differential operators `grad` and `value_and_grad` in `tidy3d.plugins.autograd` that behave similarly to the autograd operators but support auxiliary data via `aux_data=True` as well as differentiation w.r.t. `DataArray`.
 - `@scalar_objective` decorator in `tidy3d.plugins.autograd` that wraps objective functions to ensure they return a scalar value and performs additional checks to ensure compatibility of objective functions with autograd. Used by default in `tidy3d.plugins.autograd.value_and_grad` as well as `tidy3d.plugins.autograd.grad`.
+- Autograd support for simulations without adjoint sources in `run` as well as `run_async`, which will not attempt to run the simulation but instead return zero gradients. This can sometimes occur if the objective function gradient does not depend on some simulations, for example when using `min` or `max` in the objective.
 
 
 ### Changed
@@ -25,6 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `xarray` 2024.10.0 compatibility for autograd.
 - Some failing examples in the expressions plugin documentation.
 - Inaccuracy in transforming gradients from edge to `PolySlab.vertices`.
+- Bug in `run_async` where an adjoint simulation would sometimes be assigned to the wrong forward simulation.
+
 
 ## [2.7.6] - 2024-10-30
 
diff --git a/tests/test_components/test_autograd.py b/tests/test_components/test_autograd.py
@@ -673,6 +673,29 @@ def task_name_fn(i: int, sign: int) -> str:
     print(f"avg(diff(objectives)) = {diff_objectives_num:.4f}")
 
 
+def test_run_zero_grad(use_emulated_run, log_capture):
+    """Test warning if no adjoint sim is run (no adjoint sources).
+
+    This checks the case where a simulation is still part of the computational
+    graph (i.e. the output technically depends on the simulation),
+    but no adjoint sources are placed because their amplitudes are zero and thus
+    no adjoint simulation is run.
+    """
+
+    # only needs to be checked for one monitor
+    fn_dict = get_functions(args[0][0], args[0][1])
+    make_sim = fn_dict["sim"]
+    postprocess = fn_dict["postprocess"]
+
+    def objective(*args):
+        sim = make_sim(*args)
+        sim_data = run(sim, task_name="adjoint_test", verbose=False)
+        return 0 * postprocess(sim_data)
+
+    with AssertLogLevel(log_capture, "WARNING", contains_str="no sources"):
+        grad = ag.grad(objective)(params0)
+
+
 @pytest.mark.parametrize("structure_key, monitor_key", args)
 def test_autograd_objective(use_emulated_run, structure_key, monitor_key):
     """Test an objective function through tidy3d autograd."""
@@ -717,8 +740,6 @@ def test_autograd_async(use_emulated_run, structure_key, monitor_key):
     task_names = {"1", "2", "3", "4"}
 
     def objective(*args):
-        """Objective function."""
-
         sims = {task_name: make_sim(*args) for task_name in task_names}
         batch_data = run_async(sims, verbose=False)
         value = 0.0
@@ -731,6 +752,51 @@ def objective(*args):
     assert anp.all(grad != 0.0), "some gradients are 0"
 
 
+@pytest.mark.parametrize("structure_key, monitor_key", args)
+def test_autograd_async_some_zero_grad(use_emulated_run, log_capture, structure_key, monitor_key):
+    """Test objective where only some simulations in batch have adjoint sources."""
+
+    fn_dict = get_functions(structure_key, monitor_key)
+    make_sim = fn_dict["sim"]
+    postprocess = fn_dict["postprocess"]
+
+    task_names = {"1", "2", "3", "4"}
+
+    def objective(*args):
+        sims = {task_name: make_sim(*args) for task_name in task_names}
+        batch_data = run_async(sims, verbose=False)
+        values = []
+        for _, sim_data in batch_data.items():
+            values.append(postprocess(sim_data))
+        return min(values)
+
+    # with AssertLogLevel(log_capture, "DEBUG", contains_str="no sources"):
+    val, grad = ag.value_and_grad(objective)(params0)
+
+    assert anp.all(grad != 0.0), "some gradients are 0"
+
+
+def test_autograd_async_all_zero_grad(use_emulated_run, log_capture):
+    """Test objective where no simulation in batch has adjoint sources."""
+
+    fn_dict = get_functions(args[0][0], args[0][1])
+    make_sim = fn_dict["sim"]
+    postprocess = fn_dict["postprocess"]
+
+    task_names = {"1", "2", "3", "4"}
+
+    def objective(*args):
+        sims = {task_name: make_sim(*args) for task_name in task_names}
+        batch_data = run_async(sims, verbose=False)
+        values = []
+        for _, sim_data in batch_data.items():
+            values.append(postprocess(sim_data))
+        return 0 * sum(values)
+
+    with AssertLogLevel(log_capture, "WARNING", contains_str="contains adjoint sources"):
+        grad = ag.grad(objective)(params0)
+
+
 def test_autograd_speed_num_structures(use_emulated_run):
     """Test an objective function through tidy3d autograd."""
 
diff --git a/tidy3d/components/data/sim_data.py b/tidy3d/components/data/sim_data.py
@@ -1033,8 +1033,10 @@ def split_original_fwd(self, num_mnts_original: int) -> Tuple[SimulationData, Si
         return sim_data_original, sim_data_fwd
 
     def make_adjoint_sim(
-        self, data_vjp_paths: set[tuple], adjoint_monitors: list[Monitor]
-    ) -> Simulation:
+        self,
+        data_vjp_paths: set[tuple],
+        adjoint_monitors: list[Monitor],
+    ) -> Simulation | None:
         """Make the adjoint simulation from the original simulation and the VJP-containing data."""
 
         sim_original = self.simulation
@@ -1045,6 +1047,9 @@ def make_adjoint_sim(
         for src_list in sources_adj_dict.values():
             adj_srcs += list(src_list)
 
+        if not any(adj_srcs):
+            return None
+
         adjoint_source_info = self.process_adjoint_sources(adj_srcs=adj_srcs)
 
         # grab boundary conditions with flipped Bloch vectors (for adjoint)
@@ -1087,14 +1092,6 @@ def make_adjoint_sources(self, data_vjp_paths: set[tuple]) -> dict[str, SourceTy
             )
             sources_adj_all[mnt_data.monitor.name] = sources_adj
 
-        if not any(src for _, src in sources_adj_all.items()):
-            raise ValueError(
-                "No adjoint sources created for this simulation. "
-                "This could indicate a bug in your setup, for example the objective function "
-                "output depending on a monitor that is not supported. If you encounter this error, "
-                "please examine your set up or contact customer support if you need more help."
-            )
-
         return sources_adj_all
 
     @property
diff --git a/tidy3d/components/source.py b/tidy3d/components/source.py
@@ -101,7 +101,7 @@ def frequency_range(self, num_fwidth: float = 4.0) -> FreqBound:
         """Frequency range within plus/minus ``num_fwidth * fwidth`` of the central frequency."""
 
     @abstractmethod
-    def end_time(self) -> float | None:
+    def end_time(self) -> Optional[float]:
         """Time after which the source is effectively turned off / close to zero amplitude."""
 
 
@@ -192,7 +192,7 @@ def amp_time(self, time: float) -> complex:
 
         return pulse_amp
 
-    def end_time(self) -> float | None:
+    def end_time(self) -> Optional[float]:
         """Time after which the source is effectively turned off / close to zero amplitude."""
 
         # TODO: decide if we should continue to return an end_time if the DC component remains
@@ -251,7 +251,7 @@ def amp_time(self, time: float) -> complex:
 
         return const * offset * oscillation * amp
 
-    def end_time(self) -> float | None:
+    def end_time(self) -> Optional[float]:
         """Time after which the source is effectively turned off / close to zero amplitude."""
         return None
 
@@ -420,7 +420,7 @@ def amp_time(self, time: float) -> complex:
 
         return offset * oscillation * amp * envelope
 
-    def end_time(self) -> float | None:
+    def end_time(self) -> Optional[float]:
         """Time after which the source is effectively turned off / close to zero amplitude."""
 
         if self.source_time_dataset is None:
diff --git a/tidy3d/web/api/autograd/autograd.py b/tidy3d/web/api/autograd/autograd.py
@@ -13,7 +13,6 @@
 import tidy3d as td
 from tidy3d.components.autograd import AutogradFieldMap, get_static
 from tidy3d.components.autograd.derivative_utils import DerivativeInfo
-from tidy3d.components.data.sim_data import AdjointSourceInfo
 
 from ...core.s3utils import download_file, upload_file
 from ..asynchronous import DEFAULT_DATA_DIR
@@ -289,7 +288,10 @@ def run_async(
 
 
 def _run(
-    simulation: td.Simulation, task_name: str, local_gradient: bool = LOCAL_GRADIENT, **run_kwargs
+    simulation: td.Simulation,
+    task_name: str,
+    local_gradient: bool = LOCAL_GRADIENT,
+    **run_kwargs,
 ) -> td.SimulationData:
     """User-facing ``web.run`` function, compatible with ``autograd`` differentiation."""
 
@@ -323,7 +325,9 @@ def _run(
 
 
 def _run_async(
-    simulations: dict[str, td.Simulation], local_gradient: bool = LOCAL_GRADIENT, **run_async_kwargs
+    simulations: dict[str, td.Simulation],
+    local_gradient: bool = LOCAL_GRADIENT,
+    **run_async_kwargs,
 ) -> dict[str, td.SimulationData]:
     """User-facing ``web.run_async`` function, compatible with ``autograd`` differentiation."""
 
@@ -596,6 +600,15 @@ def vjp(data_fields_vjp: AutogradFieldMap) -> AutogradFieldMap:
             sim_fields_keys=sim_fields_keys,
         )
 
+        if sim_adj is None:
+            td.log.warning(
+                f"Adjoint simulation for task '{task_name}' contains no sources. "
+                "This can occur if the objective function does not depend on the "
+                "simulation's output. If this is unexpected, please review your "
+                "setup or contact customer support for assistance."
+            )
+            return {k: 0 * v for k, v in sim_fields_original.items()}
+
         # run adjoint simulation
         task_name_adj = str(task_name) + "_adjoint"
 
@@ -656,9 +669,10 @@ def _run_async_bwd(
     def vjp(data_fields_dict_vjp: dict[str, AutogradFieldMap]) -> dict[str, AutogradFieldMap]:
         """dJ/d{sim.traced_fields()} as a function of Function of dJ/d{data.traced_fields()}"""
 
-        task_names_adj = {task_name + "_adjoint" for task_name in task_names}
+        task_names_adj = [task_name + "_adjoint" for task_name in task_names]
 
         sims_adj = {}
+        sim_fields_vjp_dict = {}
         for task_name, task_name_adj in zip(task_names, task_names_adj):
             data_fields_vjp = data_fields_dict_vjp[task_name]
             sim_data_orig = sim_data_orig_dict[task_name]
@@ -669,48 +683,64 @@ def vjp(data_fields_dict_vjp: dict[str, AutogradFieldMap]) -> dict[str, Autograd
                 sim_data_orig=sim_data_orig,
                 sim_fields_keys=sim_fields_keys,
             )
+
+            if sim_adj is None:
+                td.log.debug(f"Adjoint simulation for task '{task_name}' contains no sources. ")
+                sim_fields_vjp_dict[task_name] = {
+                    k: 0 * v for k, v in sim_fields_original_dict[task_name].items()
+                }
             sims_adj[task_name_adj] = sim_adj
-            # TODO: handle case where no adjoint sources?
+
+        sims_to_run = {k: v for k, v in sims_adj.items() if v is not None}
+
+        if not sims_to_run:
+            td.log.warning(
+                "No simulation in batch contains adjoint sources and thus all gradients are zero. "
+                "This likely indicates an issue with your setup, consider double-checking or contact support."
+            )
+            return sim_fields_vjp_dict
+
+        task_names_adj = list(sims_to_run.keys())
+        task_names_fwd = [name.rstrip("_adjoint") for name in task_names_adj]
 
         if local_gradient:
             # run adjoint simulation
-            batch_data_adj, _ = _run_async_tidy3d(sims_adj, **run_async_kwargs)
+            batch_data_adj, _ = _run_async_tidy3d(sims_to_run, **run_async_kwargs)
 
-            sim_fields_vjp_dict = {}
-            for task_name, task_name_adj in zip(task_names, task_names_adj):
-                sim_data_adj = batch_data_adj[task_name_adj]
+            for task_name, task_name_adj in zip(task_names_fwd, task_names_adj):
                 sim_data_orig = sim_data_orig_dict[task_name]
                 sim_data_fwd = sim_data_fwd_dict[task_name]
                 sim_fields_keys = sim_fields_keys_dict[task_name]
 
+                sim_data_adj = batch_data_adj.get(task_name_adj)
+
                 sim_fields_vjp = postprocess_adj(
                     sim_data_adj=sim_data_adj,
                     sim_data_orig=sim_data_orig,
                     sim_data_fwd=sim_data_fwd,
                     sim_fields_keys=sim_fields_keys,
                 )
-                sim_fields_vjp_dict[task_name] = sim_fields_vjp
 
+                sim_fields_vjp_dict[task_name] = sim_fields_vjp
         else:
             parent_tasks = {}
-            for task_name_fwd, task_name_adj in zip(task_names, task_names_adj):
+            for task_name_fwd, task_name_adj in zip(task_names_fwd, task_names_adj):
                 task_id_fwd = aux_data_dict[task_name_fwd][AUX_KEY_FWD_TASK_ID]
                 parent_tasks[task_name_adj] = [task_id_fwd]
 
             run_async_kwargs["parent_tasks"] = parent_tasks
             run_async_kwargs["simulation_type"] = "autograd_bwd"
-            sims_adj = {
+            simulations = {
                 task_name: sim.updated_copy(simulation_type="autograd_bwd", deep=False)
-                for task_name, sim in sims_adj.items()
+                for task_name, sim in sims_to_run.items()
             }
             sim_fields_vjp_dict_adj_keys = _run_async_tidy3d_bwd(
-                simulations=sims_adj,
+                simulations=simulations,
                 **run_async_kwargs,
             )
 
             # swap adjoint task_names for original task_names
-            sim_fields_vjp_dict = {}
-            for task_name_fwd, task_name_adj in zip(task_names, task_names_adj):
+            for task_name_fwd, task_name_adj in zip(task_names_fwd, task_names_adj):
                 sim_fields_vjp_dict[task_name_fwd] = sim_fields_vjp_dict_adj_keys[task_name_adj]
 
         return sim_fields_vjp_dict
@@ -722,7 +752,7 @@ def setup_adj(
     data_fields_vjp: AutogradFieldMap,
     sim_data_orig: td.SimulationData,
     sim_fields_keys: list[tuple],
-) -> tuple[td.Simulation, AdjointSourceInfo]:
+) -> typing.Optional[td.Simulation]:
     """Construct an adjoint simulation from a set of data_fields for the VJP."""
 
     td.log.info("Running custom vjp (adjoint) pipeline.")
@@ -742,8 +772,11 @@ def setup_adj(
     ]
 
     sim_adj = sim_data_vjp.make_adjoint_sim(
-        data_vjp_paths=data_vjp_paths, adjoint_monitors=adjoint_monitors
+        data_vjp_paths=data_vjp_paths,
+        adjoint_monitors=adjoint_monitors,
     )
+    if sim_adj is None:
+        return sim_adj
 
     if _INSPECT_ADJOINT_FIELDS:
         adj_fld_mnt = td.FieldMonitor(