added a tiny bit of documentation

Grutschus · Grutschus · commit be8cffd2ddad · 2025-11-13T13:56:26.000+01:00
diff --git a/examples/advanced_example_config.yaml b/examples/advanced_example_config.yaml
@@ -59,6 +59,8 @@ seml:
   output_dir: logs
   project_root_dir: .
   description: "An advanced example configuration. We can also use variable interpolation here: ${config.model.model_type}"
+  reschedule_timeout: 300 # The time (in seconds) that are left on the job before SEML will try to reschedule unfinished experiments.
+  # Note that you have to implement a `reschedule_hook` to use this feature.
 
 slurm:
   - experiments_per_job: 1
diff --git a/examples/advanced_example_experiment.py b/examples/advanced_example_experiment.py
@@ -147,6 +147,9 @@ def train(self, patience, num_epochs):
         # everything is set up
         for e in range(num_epochs):
             # simulate training
+
+            # calling reschedule hook
+            reschedule_hook(model_weights={}, step=e)
             continue
         results = {
             "test_acc": 0.5 + 0.3 * np.random.randn(),
@@ -165,6 +168,22 @@ def get_experiment(init_all=False):
     return experiment
 
 
+# This function will be called when the reschedule is triggered.
+# It should save the current state of the experiment and return a
+# dictionary that may be used to update the configuration upon rescheduling.
+# You are responsible for implementing the actual saving/loading of the experiment state
+# due to the updated config.
+@ex.reschedule_hook
+def reschedule_hook(model_weights, step, **kwargs):
+    # Here you would save the current state of the experiment
+    # and return any necessary configuration updates.
+
+    # !!! You will need to call this function regularly from within your training loop
+    # to check if rescheduling is needed.
+    # Pass everything you need to store your state to this function.
+    return {"checkpoint_path": "path/to/saved/checkpoint"}
+
+
 # This function will be called by default. Note that we could in principle manually pass an experiment instance,
 # e.g., obtained by loading a model from the database or by calling this from a Jupyter notebook.
 @ex.automain