Improve the interpolated_param logic to be more flexible around tracers versus arrays etc

tamaranorman · Torax team · commit f68547eafa5a · 2026-01-08T06:00:45.000-08:00
PiperOrigin-RevId: 853700330
diff --git a/torax/_src/orchestration/step_function.py b/torax/_src/orchestration/step_function.py
@@ -142,16 +142,6 @@ def check_for_errors(
       post_processed_outputs: post_processing.PostProcessedOutputs,
   ) -> state.SimError:
     """Checks for errors in the simulation state."""
-    if self._runtime_params_provider.numerics.adaptive_dt:
-      if output_state.solver_numeric_outputs.solver_error_state == 1:
-        # Only check for min dt if the solver did not converge. Else we may have
-        # converged at a dt > min_dt just before we reach min_dt.
-        if (
-            output_state.dt
-            / self._runtime_params_provider.numerics.dt_reduction_factor
-            < self._runtime_params_provider.numerics.min_dt
-        ):
-          return state.SimError.REACHED_MIN_DT
 
     # Low-temperature collapse check
     if output_state.core_profiles.below_minimum_temperature(
@@ -162,8 +152,20 @@ def check_for_errors(
     state_error = output_state.check_for_errors()
     if state_error != state.SimError.NO_ERROR:
       return state_error
-    else:
-      return post_processed_outputs.check_for_errors()
+
+    post_processed_error = post_processed_outputs.check_for_errors()
+    if post_processed_error != state.SimError.NO_ERROR:
+      return post_processed_error
+
+    # Check if reached the minimum time step last - this is often caused by
+    # other errors so check those first to give more informative error messages.
+    if self._runtime_params_provider.numerics.adaptive_dt:
+      if output_state.solver_numeric_outputs.solver_error_state == 1:
+        # If using adaptive stepping and the solver did not converge we must
+        # have reached the minimum time step, so we can exit the simulation.
+        return state.SimError.REACHED_MIN_DT
+
+    return state.SimError.NO_ERROR
 
   @jax.jit
   def __call__(
@@ -298,8 +300,13 @@ def fixed_time_step(
     remaining_dt = dt
 
     def cond(args):
-      remaining_dt, _, _ = args
-      return remaining_dt > constants.CONSTANTS.eps
+      remaining_dt, prev_state, _ = args
+      if self.runtime_params_provider.numerics.adaptive_dt:
+        exit_min_dt = prev_state.solver_numeric_outputs.solver_error_state == 1
+      else:
+        exit_min_dt = False
+      return jnp.logical_and(
+          remaining_dt > constants.CONSTANTS.eps, ~exit_min_dt)
 
     def body(args):
       remaining_dt, prev_state, prev_post_processed = args
diff --git a/torax/_src/orchestration/tests/step_function_jax_transforms_test.py b/torax/_src/orchestration/tests/step_function_jax_transforms_test.py
@@ -0,0 +1,61 @@
+# Copyright 2024 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from absl.testing import absltest
+from absl.testing import parameterized
+import jax
+import jax.test_util as jtu
+from torax._src.config import config_loader
+from torax._src.orchestration import run_simulation
+from torax._src.torax_pydantic import interpolated_param_1d
+
+
+class StepFunctionTest(parameterized.TestCase):
+
+  @parameterized.parameters([
+      'basic_config',
+      'iterhybrid_predictor_corrector',
+  ])
+  def test_step_function_grad(self, config_name_no_py):
+    example_config_paths = config_loader.example_config_paths()
+    example_config_path = example_config_paths[config_name_no_py]
+    cfg = config_loader.build_torax_config_from_file(example_config_path)
+    (
+        sim_state,
+        post_processed_outputs,
+        step_fn,
+    ) = run_simulation.prepare_simulation(cfg)
+    params_provider = step_fn.runtime_params_provider
+    input_value = params_provider.profile_conditions.Ip.value
+
+    @jax.jit
+    def f(override_value):
+      ip_update = interpolated_param_1d.TimeVaryingScalarUpdate(
+          value=override_value
+      )
+      runtime_params_overrides = params_provider.update_provider(
+          lambda x: (x.profile_conditions.Ip,),
+          (ip_update,),
+      )
+      _, new_post_processed_outputs = step_fn(
+          sim_state,
+          post_processed_outputs,
+          runtime_params_overrides=runtime_params_overrides,
+      )
+      return new_post_processed_outputs.Q_fusion
+
+    jtu.check_grads(f, (input_value,), order=1, modes=('rev',))
+
+
+if __name__ == '__main__':
+  absltest.main()
diff --git a/torax/_src/orchestration/tests/step_function_test.py b/torax/_src/orchestration/tests/step_function_test.py
@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import dataclasses
+import os
 from typing import Any
 
 from absl.testing import absltest
 from absl.testing import parameterized
 import chex
-import jax
 import jax.numpy as jnp
-import jax.test_util as jtu
 import numpy as np
 from torax._src import state
 from torax._src.config import config_loader
@@ -28,6 +27,7 @@
 from torax._src.orchestration import step_function
 from torax._src.output_tools import post_processing
 from torax._src.test_utils import default_configs
+from torax._src.test_utils import paths
 from torax._src.torax_pydantic import interpolated_param_1d
 from torax._src.torax_pydantic import model_config
 
@@ -266,6 +266,24 @@ def test_adaptive_step_with_smaller_passed_max_dt(self):
     )
     self.assertTrue(np.less_equal(output_state.dt, passed_max_dt))
 
+  def test_fixed_step_with_high_density_errors_and_does_not_hang(self):
+    # This test enforces that we exit the fixed step function early if we hit
+    # min_dt. If we don't do this then we risk hanging for a very long time as
+    # we stay at min_dt and the step never seems to make progress. This test
+    # ensures that we don't hang and instead fail early.
+    test_data_dir = paths.test_data_dir()
+    torax_config = config_loader.build_torax_config_from_file(
+        os.path.join(test_data_dir, 'test_iterhybrid_radiation_collapse.py')
+    )
+    sim_state, post_processed_outputs, step_fn = (
+        run_simulation.prepare_simulation(torax_config)
+    )
+    sim_state, post_processed_outputs = step_fn.fixed_time_step(
+        np.array(1.), sim_state, post_processed_outputs)
+
+    sim_error = step_fn.check_for_errors(sim_state, post_processed_outputs)
+    self.assertEqual(sim_error, state.SimError.NAN_DETECTED)
+
   def test_call_with_sawtooth_solver_smoke_test(self):
     """Smoke test for the boolean logic around the sawtooth solver.
 
@@ -338,49 +356,11 @@ def test_fixed_time_step_t_less_than_min_dt(self):
     )
     np.testing.assert_allclose(output_state.dt, 0.01, atol=1e-7)
 
-  @parameterized.parameters([
-      'basic_config',
-      'iterhybrid_predictor_corrector',
-  ])
-  def test_step_function_grad(self, config_name_no_py):
-    example_config_paths = config_loader.example_config_paths()
-    example_config_path = example_config_paths[config_name_no_py]
-    cfg = config_loader.build_torax_config_from_file(example_config_path)
-    (
-        sim_state,
-        post_processed_outputs,
-        step_fn,
-    ) = run_simulation.prepare_simulation(cfg)
-    params_provider = step_fn.runtime_params_provider
-    input_value = params_provider.profile_conditions.Ip.value
-
-    @jax.jit
-    def f(override_value):
-      ip_update = interpolated_param_1d.TimeVaryingScalarUpdate(
-          value=override_value
-      )
-      runtime_params_overrides = params_provider.update_provider(
-          lambda x: (x.profile_conditions.Ip,),
-          (ip_update,),
-      )
-      _, new_post_processed_outputs = step_fn(
-          sim_state,
-          post_processed_outputs,
-          runtime_params_overrides=runtime_params_overrides,
-      )
-      return new_post_processed_outputs.Q_fusion
-
-    jtu.check_grads(f, (input_value,), order=1, modes=('rev',))
-
-  @parameterized.parameters([
-      'iterhybrid_predictor_corrector',
-      'iterhybrid_rampup',
-  ])
-  def test_step_function_overrides(self, config_name_no_py):
-    example_config_paths = config_loader.example_config_paths()
-    example_config_path = example_config_paths[config_name_no_py]
-    raw_config = config_loader.import_module(example_config_path)['CONFIG']
-    cfg = config_loader.build_torax_config_from_file(example_config_path)
+  def test_step_function_overrides(self):
+    original_ip = 15e6
+    config_dict = default_configs.get_default_config_dict()
+    config_dict['profile_conditions']['Ip'] = original_ip
+    cfg = model_config.ToraxConfig.from_dict(config_dict)
     (
         sim_state,
         post_processed_outputs,
@@ -403,10 +383,7 @@ def test_step_function_overrides(self, config_name_no_py):
     )
 
     # Update the config itself and re-run the step.
-    doubled_ip = jax.tree_util.tree_map(
-        lambda x: x * 2.0, raw_config['profile_conditions']['Ip']
-    )
-    cfg.update_fields({'profile_conditions.Ip': doubled_ip})
+    cfg.update_fields({'profile_conditions.Ip': original_ip * 2.0})
     step_fn = run_simulation.make_step_fn(cfg)
     ref_state, ref_post_processed_outputs = step_fn(
         # Use original state and post-processed outputs as the initial value.
@@ -419,13 +396,9 @@ def test_step_function_overrides(self, config_name_no_py):
         override_post_processed_outputs, ref_post_processed_outputs
     )
 
-  @parameterized.parameters([
-      ('iterhybrid_rampup',),
-  ])
-  def test_step_function_geo_overrides(self, config_name_no_py):
-    example_config_paths = config_loader.example_config_paths()
-    example_config_path = example_config_paths[config_name_no_py]
-    cfg = config_loader.build_torax_config_from_file(example_config_path)
+  def test_step_function_geo_overrides(self):
+    config_dict = default_configs.get_default_config_dict()
+    cfg = model_config.ToraxConfig.from_dict(config_dict)
     (
         sim_state,
         post_processed_outputs,
diff --git a/torax/_src/torax_pydantic/interpolated_param_2d.py b/torax/_src/torax_pydantic/interpolated_param_2d.py
@@ -83,18 +83,22 @@ class TimeVaryingArrayUpdate:
 
   def __post_init__(self):
     """Consistency checks for the provided values."""
-    if not isinstance(self.value, type(self.rho_norm)):
+    if (self.rho_norm is None and self.value is not None) or (
+        self.rho_norm is not None and self.value is None
+    ):
       raise ValueError(
-          'If rho_norm is provided, value must also be provided. Got value:'
-          f' {type(self.value)}, rho_norm: {type(self.rho_norm)}'
+          'Either both or neither of rho_norm and value must be provided.'
       )
+
     if self.rho_norm is not None and self.value is not None:
       rho_norm_shape = self.rho_norm.shape
       if rho_norm_shape[0] != self.value.shape[1]:
         raise ValueError(
-            'rho_norm and value must have the same shape. Got rho_norm shape:'
-            f' {rho_norm_shape} and value shape: {self.value.shape}'
+            'rho_norm and value must have the same trailing dimension. '
+            f'Got rho_norm shape: {rho_norm_shape} and value shape: '
+            f'{self.value.shape}'
         )
+
     if self.value is not None and self.time is not None:
       if self.value.shape[0] != self.time.shape[0]:
         raise ValueError(
@@ -588,6 +592,7 @@ def _get_face_centers(nx: int, dx: float) -> np.ndarray:
 def _get_cell_centers(nx: int, dx: float) -> np.ndarray:
   return np.linspace(dx * 0.5, (nx - 0.5) * dx, nx)
 
+
 NonNegativeTimeVaryingArray: TypeAlias = typing_extensions.Annotated[
     TimeVaryingArray, pydantic.AfterValidator(_is_non_negative)
 ]
diff --git a/torax/_src/torax_pydantic/tests/interpolated_param_2d_test.py b/torax/_src/torax_pydantic/tests/interpolated_param_2d_test.py
@@ -470,6 +470,51 @@ def f(
         interpolated.get_value(t=0.0, grid_type='face_right'),
     )
 
+  def test_time_varying_array_update_validations_value_only(self):
+    with self.assertRaisesRegex(
+        ValueError,
+        'Either both or neither of rho_norm and value must be provided.',
+    ):
+      interpolated_param_2d.TimeVaryingArrayUpdate(
+          value=np.array([[1.0]]), rho_norm=None
+      )
+
+  def test_time_varying_array_update_validations_rhonorm_only(self):
+    with self.assertRaisesRegex(
+        ValueError,
+        'Either both or neither of rho_norm and value must be provided.',
+    ):
+      interpolated_param_2d.TimeVaryingArrayUpdate(
+          value=None, rho_norm=np.array([1.0])
+      )
+
+  def test_time_varying_array_update_validations_shape_mismatch(self):
+    with self.assertRaisesRegex(
+        ValueError,
+        'rho_norm and value must have the same trailing dimension.',
+    ):
+      interpolated_param_2d.TimeVaryingArrayUpdate(
+          value=np.array([[1.0, 2.0], [3.0, 4.0]]), rho_norm=np.array([1.0])
+      )
+
+  def test_time_varying_array_update_validations_time_dimension_mismatch(self):
+    with self.assertRaisesRegex(
+        ValueError,
+        'value and time arrays must have same leading dimension.',
+    ):
+      interpolated_param_2d.TimeVaryingArrayUpdate(
+          value=np.array([[1.0, 2.0], [3.0, 4.0]]),
+          rho_norm=np.array([0.0, 1.0]),
+          time=np.array([0.0]),
+      )
+
+  def test_allowed_mix_of_numpy_and_jax_arrays_for_update(self):
+    interpolated_param_2d.TimeVaryingArrayUpdate(
+        value=jnp.array([[1.0, 2.0], [3.0, 4.0]]),
+        rho_norm=np.array([0.0, 1.0]),
+        time=np.array([0.0, 1.0]),
+    )
+
   @parameterized.named_parameters(
       dict(
           testcase_name='update_values',
diff --git a/torax/tests/test_data/test_iterhybrid_radiation_collapse.py b/torax/tests/test_data/test_iterhybrid_radiation_collapse.py
@@ -31,6 +31,10 @@
     'W': W_frac,
 }
 CONFIG['plasma_composition']['Z_eff'] = 3.0
+
+# Remove QLKNN transport model to simplify step and avoid QLKNN load.
+CONFIG['transport'] = {}
+
 CONFIG['sources']['impurity_radiation'] = {
     'model_name': 'mavrin_fit',
 }

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,10 @@`
`31`	`31`	`'W': W_frac,`
`32`	`32`	`}`
`33`	`33`	`CONFIG['plasma_composition']['Z_eff'] = 3.0`
	`34`	`+`
	`35`	`+# Remove QLKNN transport model to simplify step and avoid QLKNN load.`
	`36`	`+CONFIG['transport'] = {}`
	`37`	`+`
`34`	`38`	`CONFIG['sources']['impurity_radiation'] = {`
`35`	`39`	`'model_name': 'mavrin_fit',`
`36`	`40`	`}`