From 72dedc894d862cb3033062e00b55ab16981bee62 Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:11:29 +0200 Subject: [PATCH 01/12] Added plots for solution of Lorenz with and without faults --- pySDC/projects/Resilience/fault_stats.py | 6 +- pySDC/projects/Resilience/paper_plots.py | 110 +++++++++++++++++++++-- pySDC/projects/Resilience/strategies.py | 4 +- 3 files changed, 110 insertions(+), 10 deletions(-) diff --git a/pySDC/projects/Resilience/fault_stats.py b/pySDC/projects/Resilience/fault_stats.py index f28aa11746..0068c102fe 100644 --- a/pySDC/projects/Resilience/fault_stats.py +++ b/pySDC/projects/Resilience/fault_stats.py @@ -1654,11 +1654,11 @@ def compare_adaptivity_modes(): def main(): kwargs = { - 'prob': run_AC, + 'prob': run_Lorenz, 'num_procs': 1, 'mode': 'default', - 'runs': 2000, - 'reload': True, + 'runs': 6000, + 'reload': False, **parse_args(), } diff --git a/pySDC/projects/Resilience/paper_plots.py b/pySDC/projects/Resilience/paper_plots.py index e3ad6d435e..68f00b57a2 100644 --- a/pySDC/projects/Resilience/paper_plots.py +++ b/pySDC/projects/Resilience/paper_plots.py @@ -19,6 +19,7 @@ DIRKStrategy, ERKStrategy, AdaptivityPolynomialError, + cmap, ) from pySDC.helpers.plot_helper import setup_mpl, figsize_by_journal from pySDC.helpers.stats_helper import get_sorted @@ -388,6 +389,105 @@ def plot_fault_vdp(bit=0): # pragma: no cover savefig(fig, f'fault_bit_{bit}') +def plot_fault_Lorenz(bit=0): # pragma: no cover + """ + Make a plot showing the impact of a fault on the Lorenz attractor without any resilience. + The faults are inserted in the last iteration in the last node in x such that you can best see the impact. + + Args: + bit (int): The bit that you want to flip + + Returns: + None + """ + from pySDC.projects.Resilience.fault_stats import ( + FaultStats, + BaseStrategy, + ) + from pySDC.projects.Resilience.hook import LogData + + stats_analyser = FaultStats( + prob=run_Lorenz, + strategies=[BaseStrategy()], + faults=[False, True], + reload=True, + recovery_thresh=1.1, + num_procs=1, + mode='combination', + ) + + strategy = BaseStrategy() + + my_setup_mpl() + fig, ax = plt.subplots(figsize=figsize_by_journal(JOURNAL, 0.8, 0.5)) + colors = ['grey', strategy.color, 'magenta'] + ls = ['--', '-'] + markers = [None, strategy.marker] + do_faults = [False, True] + superscripts = ['*', ''] + labels = ['x', 'x'] + + run = 19 + 20 * bit + + for i in range(len(do_faults)): + stats, controller, Tend = stats_analyser.single_run( + strategy=BaseStrategy(), + run=run, + faults=do_faults[i], + hook_class=[LogData], + ) + u = get_sorted(stats, type='u') + faults = get_sorted(stats, type='bitflip') + ax.plot( + [me[0] for me in u], + [me[1][0] for me in u], + ls=ls[i], + color=colors[i], + label=rf'${{{labels[i]}}}^{{{superscripts[i]}}}$', + marker=markers[i], + markevery=120, + ) + for idx in range(len(faults)): + ax.axvline(faults[idx][0], color='black', label='Fault', ls=':') + print( + f'Fault at t={faults[idx][0]:.2e}, iter={faults[idx][1][1]}, node={faults[idx][1][2]}, space={faults[idx][1][3]}, bit={faults[idx][1][4]}' + ) + ax.set_title(f'Fault in bit {faults[idx][1][4]}') + + ax.legend(frameon=True, loc='lower left') + ax.set_xlabel(r'$t$') + # plt.show() + savefig(fig, f'fault_bit_{bit}') + + +def plot_Lorenz_solution(): # pragma: no cover + my_setup_mpl() + + fig, axs = plt.subplots(1, 2, figsize=figsize_by_journal(JOURNAL, 1, 0.4), sharex=True) + + strategy = BaseStrategy() + desc = strategy.get_custom_description(run_Lorenz, num_procs=1) + stats, controller, _ = run_Lorenz(custom_description=desc, Tend=strategy.get_Tend(run_Lorenz)) + + u = get_sorted(stats, recomputed=False, type='u') + + axs[0].plot([me[1][0] for me in u], [me[1][2] for me in u]) + axs[0].set_ylabel('$z$') + axs[0].set_xlabel('$x$') + + axs[1].plot([me[1][0] for me in u], [me[1][1] for me in u]) + axs[1].set_ylabel('$y$') + axs[1].set_xlabel('$x$') + + for ax in axs: + ax.set_box_aspect(1.0) + + path = 'data/paper/Lorenz_sol.pdf' + fig.savefig(path, bbox_inches='tight', transparent=True, dpi=200) + + plt.show() + + def plot_quench_solution(): # pragma: no cover """ Plot the solution of Quench problem over time @@ -596,10 +696,9 @@ def make_plots_for_adaptivity_paper(): # pragma: no cover def make_plots_for_resilience_paper(): # pragma: no cover - plot_recovery_rate(get_stats(run_vdp)) - plot_fault_vdp(0) - plot_fault_vdp(13) - compare_recovery_rate_problems(num_procs=1, strategy_type='SDC') + # plot_Lorenz_solution() + # plot_fault_Lorenz(0) + plot_fault_Lorenz(20) def make_plots_for_notes(): # pragma: no cover @@ -618,4 +717,5 @@ def make_plots_for_notes(): # pragma: no cover # make_plots_for_notes() # make_plots_for_SIAM_CSE23() # make_plots_for_TIME_X_website() - make_plots_for_adaptivity_paper() + # make_plots_for_adaptivity_paper() + make_plots_for_resilience_paper() diff --git a/pySDC/projects/Resilience/strategies.py b/pySDC/projects/Resilience/strategies.py index e080e90cc3..1904f66011 100644 --- a/pySDC/projects/Resilience/strategies.py +++ b/pySDC/projects/Resilience/strategies.py @@ -127,7 +127,7 @@ def get_fault_args(self, problem, num_procs): elif problem.__name__ == "run_quench": args['time'] = 41.0 elif problem.__name__ == "run_Lorenz": - args['time'] = 0.3 + args['time'] = 10 elif problem.__name__ == "run_AC": args['time'] = 1e-2 @@ -199,7 +199,7 @@ def get_Tend(cls, problem, num_procs=1): elif problem.__name__ == "run_piline": return 20.0 elif problem.__name__ == "run_Lorenz": - return 1.5 + return 20 elif problem.__name__ == "run_Schroedinger": return 1.0 elif problem.__name__ == "run_quench": From d7c03f6240f50121fc84c17b0bf38ce33b8b16ce Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Tue, 3 Sep 2024 20:29:50 +0200 Subject: [PATCH 02/12] Bugfix --- pySDC/projects/Resilience/fault_stats.py | 4 +++- pySDC/projects/Resilience/paper_plots.py | 10 ++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pySDC/projects/Resilience/fault_stats.py b/pySDC/projects/Resilience/fault_stats.py index 0068c102fe..db54efa623 100644 --- a/pySDC/projects/Resilience/fault_stats.py +++ b/pySDC/projects/Resilience/fault_stats.py @@ -270,8 +270,10 @@ def generate_stats(self, strategy=None, runs=1000, reload=True, faults=True, com dat['bit'][i] = faults_run[0][1][4] dat['target'][i] = faults_run[0][1][5] dat['rank'][i] = faults_run[0][1][6] + if crash: print('Code crashed!') + dat['error'][i] = np.inf continue # record the rest of the data @@ -873,7 +875,7 @@ def plot_things_per_things( args=None, strategies=None, name=None, - store=True, + store=False, ax=None, fig=None, plotting_args=None, diff --git a/pySDC/projects/Resilience/paper_plots.py b/pySDC/projects/Resilience/paper_plots.py index 68f00b57a2..73552d6318 100644 --- a/pySDC/projects/Resilience/paper_plots.py +++ b/pySDC/projects/Resilience/paper_plots.py @@ -456,7 +456,6 @@ def plot_fault_Lorenz(bit=0): # pragma: no cover ax.legend(frameon=True, loc='lower left') ax.set_xlabel(r'$t$') - # plt.show() savefig(fig, f'fault_bit_{bit}') @@ -485,8 +484,6 @@ def plot_Lorenz_solution(): # pragma: no cover path = 'data/paper/Lorenz_sol.pdf' fig.savefig(path, bbox_inches='tight', transparent=True, dpi=200) - plt.show() - def plot_quench_solution(): # pragma: no cover """ @@ -698,7 +695,12 @@ def make_plots_for_adaptivity_paper(): # pragma: no cover def make_plots_for_resilience_paper(): # pragma: no cover # plot_Lorenz_solution() # plot_fault_Lorenz(0) - plot_fault_Lorenz(20) + # plot_fault_Lorenz(20) + # compare_recovery_rate_problems() + # fig, axs = plt.subplots(1, 2, figsize=figsize_by_journal(JOURNAL, 1, 0.8)) + # plot_recovery_rate_recoverable_only(get_stats(run_Lorenz), fig, axs[1]) + plot_recovery_rate(get_stats(run_Lorenz)) + # savefig(fig, 'recovery_rate') def make_plots_for_notes(): # pragma: no cover From 0b4a79ea21ac76e4d05086952841cf9310a52b9a Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Wed, 4 Sep 2024 09:35:02 +0200 Subject: [PATCH 03/12] Fixed resilience parameters for Allen-Cahn --- pySDC/projects/Resilience/fault_stats.py | 22 +++++++++++++------ pySDC/projects/Resilience/paper_plots.py | 11 +++++++--- pySDC/projects/Resilience/strategies.py | 8 +++++-- .../Resilience/tests/test_fault_injection.py | 2 +- 4 files changed, 30 insertions(+), 13 deletions(-) diff --git a/pySDC/projects/Resilience/fault_stats.py b/pySDC/projects/Resilience/fault_stats.py index db54efa623..f74b184180 100644 --- a/pySDC/projects/Resilience/fault_stats.py +++ b/pySDC/projects/Resilience/fault_stats.py @@ -932,7 +932,7 @@ def plot_things_per_things( return None def plot_recovery_thresholds( - self, strategies=None, thresh_range=None, ax=None, mask=None, **kwargs + self, strategies=None, thresh_range=None, ax=None, recoverable_only=False, **kwargs ): # pragma: no cover ''' Plot the recovery rate for a range of thresholds @@ -941,7 +941,6 @@ def plot_recovery_thresholds( strategies (list): List of the strategies you want to plot, if None, all will be plotted thresh_range (list): thresholds for deciding whether to accept as recovered ax (Matplotlib.axes): Somewhere to plot - mask (Numpy.ndarray of shape (n)): The mask you want to know about Returns: None @@ -960,16 +959,21 @@ def plot_recovery_thresholds( fault_free = self.load(strategy=strategy, faults=False) with_faults = self.load(strategy=strategy, faults=True) + if recoverable_only: + recoverable_mask = self.get_fixable_faults_only(strategy) + else: + recoverable_mask = self.get_mask() + for thresh_idx in range(len(thresh_range)): rec_mask = self.get_mask( strategy=strategy, key='error', val=(thresh_range[thresh_idx] * fault_free['error'].mean()), op='gt', - old_mask=mask, + old_mask=recoverable_mask, ) rec_rates[strategy_idx][thresh_idx] = 1.0 - len(with_faults['error'][rec_mask]) / len( - with_faults['error'] + with_faults['error'][recoverable_mask] ) ax.plot( @@ -1656,10 +1660,10 @@ def compare_adaptivity_modes(): def main(): kwargs = { - 'prob': run_Lorenz, + 'prob': run_AC, 'num_procs': 1, 'mode': 'default', - 'runs': 6000, + 'runs': 5000, 'reload': False, **parse_args(), } @@ -1684,7 +1688,7 @@ def main(): stats_path='data/stats-jusuf', **kwargs, ) - stats_analyser.run_stats_generation(runs=kwargs['runs'], step=12) + stats_analyser.run_stats_generation(runs=kwargs['runs'], step=25) if MPI.COMM_WORLD.rank > 0: # make sure only one rank accesses the data return None @@ -1692,6 +1696,10 @@ def main(): stats_analyser.get_recovered() mask = None + # mask = + stats_analyser.scrutinize(HotRodStrategy(), run=1, faults=False) + return None + # stats_analyser.compare_strategies() stats_analyser.plot_things_per_things( 'recovered', 'node', False, op=stats_analyser.rec_rate, mask=mask, args={'ylabel': 'recovery rate'} diff --git a/pySDC/projects/Resilience/paper_plots.py b/pySDC/projects/Resilience/paper_plots.py index 73552d6318..da05295242 100644 --- a/pySDC/projects/Resilience/paper_plots.py +++ b/pySDC/projects/Resilience/paper_plots.py @@ -643,6 +643,12 @@ def work_precision(): # pragma: no cover all_problems(**{**all_params, 'work_key': 'param'}, mode='compare_strategies') +def plot_recovery_rate_per_acceptance_threshold(problem): # pragma no cover + stats_analyser = get_stats(problem) + + stats_analyser.plot_recovery_thresholds(thresh_range=np.linspace(0.5, 1.5, 1000), recoverable_only=True) + + def make_plots_for_TIME_X_website(): # pragma: no cover global JOURNAL, BASE_PATH JOURNAL = 'JSC_beamer' @@ -697,10 +703,9 @@ def make_plots_for_resilience_paper(): # pragma: no cover # plot_fault_Lorenz(0) # plot_fault_Lorenz(20) # compare_recovery_rate_problems() - # fig, axs = plt.subplots(1, 2, figsize=figsize_by_journal(JOURNAL, 1, 0.8)) - # plot_recovery_rate_recoverable_only(get_stats(run_Lorenz), fig, axs[1]) plot_recovery_rate(get_stats(run_Lorenz)) - # savefig(fig, 'recovery_rate') + plot_recovery_rate_per_acceptance_threshold(run_Lorenz) + plt.show() def make_plots_for_notes(): # pragma: no cover diff --git a/pySDC/projects/Resilience/strategies.py b/pySDC/projects/Resilience/strategies.py index 1904f66011..422d27185e 100644 --- a/pySDC/projects/Resilience/strategies.py +++ b/pySDC/projects/Resilience/strategies.py @@ -434,6 +434,8 @@ def get_custom_description_for_faults(self, problem, *args, **kwargs): desc = self.get_custom_description(problem, *args, **kwargs) if problem.__name__ == "run_quench": desc['level_params']['dt'] = 5.0 + elif problem.__name__ == "run_AC": + desc['level_params']['dt'] = 8e-5 return desc def get_reference_value(self, problem, key, op, num_procs=1): @@ -590,6 +592,8 @@ def get_custom_description_for_faults(self, problem, num_procs, *args, **kwargs) if problem.__name__ == "run_quench": desc['level_params']['dt'] = 1.1e1 desc['convergence_controllers'][Adaptivity]['e_tol'] = 1e-6 + elif problem.__name__ == "run_AC": + desc['convergence_controllers'][Adaptivity]['e_tol'] = 1e-5 return desc @@ -834,7 +838,7 @@ def get_custom_description_for_faults(self, problem, *args, **kwargs): if problem.__name__ == 'run_quench': desc['level_params']['dt'] = 5.0 elif problem.__name__ == 'run_AC': - desc['level_params']['dt'] = 0.6 * desc['problem_params']['eps'] ** 2 + desc['level_params']['dt'] = 5e-4 return desc def get_reference_value(self, problem, key, op, num_procs=1): @@ -949,7 +953,7 @@ def get_custom_description(self, problem, num_procs): 'level_params': {}, } if problem.__name__ == "run_AC": - custom_description['level_params']['dt'] = 0.8 * base_params['problem_params']['eps'] ** 2 / 8.0 + custom_description['level_params']['dt'] = 8e-5 return merge_descriptions(base_params, custom_description) def get_custom_description_for_faults(self, problem, *args, **kwargs): diff --git a/pySDC/projects/Resilience/tests/test_fault_injection.py b/pySDC/projects/Resilience/tests/test_fault_injection.py index c44ea38307..beaf8e7141 100644 --- a/pySDC/projects/Resilience/tests/test_fault_injection.py +++ b/pySDC/projects/Resilience/tests/test_fault_injection.py @@ -200,7 +200,7 @@ def test_fault_stats(numprocs): index = stats.get_index(mask=fixable_mask) assert all(fixable_mask[:-1] == [False, True, False]), "Error in generating mask of fixable faults" - assert all(index == [1, 3]), "Error when converting to index" + assert all(index == [1]), "Error when converting to index" combinations = np.array(stats.get_combination_counts(dat, keys=['bit'], mask=fixable_mask)) assert all(combinations == [1.0, 1.0]), "Error when counting combinations" From 357973e5b18694d5aebbf22e0df9b4b89e018849 Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Wed, 4 Sep 2024 10:01:50 +0200 Subject: [PATCH 04/12] Changed parameters for Lorenz --- pySDC/projects/Resilience/fault_stats.py | 2 +- pySDC/projects/Resilience/strategies.py | 26 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pySDC/projects/Resilience/fault_stats.py b/pySDC/projects/Resilience/fault_stats.py index f74b184180..5eee716822 100644 --- a/pySDC/projects/Resilience/fault_stats.py +++ b/pySDC/projects/Resilience/fault_stats.py @@ -1660,7 +1660,7 @@ def compare_adaptivity_modes(): def main(): kwargs = { - 'prob': run_AC, + 'prob': run_Lorenz, 'num_procs': 1, 'mode': 'default', 'runs': 5000, diff --git a/pySDC/projects/Resilience/strategies.py b/pySDC/projects/Resilience/strategies.py index 422d27185e..e0f5cbe06c 100644 --- a/pySDC/projects/Resilience/strategies.py +++ b/pySDC/projects/Resilience/strategies.py @@ -240,7 +240,7 @@ def get_base_parameters(self, problem, num_procs=1): elif problem.__name__ == "run_Lorenz": custom_description['step_params'] = {'maxiter': 5} - custom_description['level_params'] = {'dt': 1e-2} + custom_description['level_params'] = {'dt': 1e-3} custom_description['problem_params'] = {'stop_at_nan': False} elif problem.__name__ == "run_Schroedinger": custom_description['step_params'] = {'maxiter': 5} @@ -453,9 +453,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 2136 + return 243380 elif key == 'e_global_post_run' and op == max: - return 9.256926357892326e-06 + return 4.68e-04 super().get_reference_value(problem, key, op, num_procs) @@ -529,7 +529,7 @@ def get_custom_description(self, problem, num_procs): elif problem.__name__ == "run_vdp": e_tol = 2e-5 elif problem.__name__ == "run_Lorenz": - e_tol = 2e-5 + e_tol = 1e-7 elif problem.__name__ == "run_Schroedinger": e_tol = 4e-7 elif problem.__name__ == "run_quench": @@ -578,9 +578,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == 'run_Lorenz': if key == 'work_newton' and op == sum: - return 1369 + return 44635 elif key == 'e_global_post_run' and op == max: - return 9.364841517367495e-06 + return 2.82e-04 super().get_reference_value(problem, key, op, num_procs) @@ -856,9 +856,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 2392 + return 243380 elif key == 'e_global_post_run' and op == max: - return 4.808610118089973e-06 + return 4.68e-04 super().get_reference_value(problem, key, op, num_procs) @@ -977,9 +977,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 2329 + return 243380 elif key == 'e_global_post_run' and op == max: - return 9.256926357892326e-06 + return 4.68e-04 super().get_reference_value(problem, key, op, num_procs) @@ -1882,7 +1882,7 @@ def get_custom_description(self, problem, num_procs): elif problem.__name__ == "run_piline": e_tol = 1e-7 elif problem.__name__ == "run_Lorenz": - e_tol = 7e-4 + e_tol = 2e-4 elif problem.__name__ == "run_Schroedinger": e_tol = 3e-5 elif problem.__name__ == "run_quench": @@ -1966,9 +1966,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 2124 + return 25404 elif key == 'e_global_post_run' and op == max: - return 8.484321512014503e-08 + return 4.34e-04 super().get_reference_value(problem, key, op, num_procs) From 9c6857e8eae24fb54096c38920647c6fcfeed1e3 Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:18:16 +0200 Subject: [PATCH 05/12] Fixed rest of reference values for test --- pySDC/projects/Resilience/paper_plots.py | 12 ++-- pySDC/projects/Resilience/strategies.py | 56 +++++++++---------- .../Resilience/tests/test_strategies.py | 1 + 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/pySDC/projects/Resilience/paper_plots.py b/pySDC/projects/Resilience/paper_plots.py index da05295242..bb5e324480 100644 --- a/pySDC/projects/Resilience/paper_plots.py +++ b/pySDC/projects/Resilience/paper_plots.py @@ -445,7 +445,7 @@ def plot_fault_Lorenz(bit=0): # pragma: no cover color=colors[i], label=rf'${{{labels[i]}}}^{{{superscripts[i]}}}$', marker=markers[i], - markevery=120, + markevery=500, ) for idx in range(len(faults)): ax.axvline(faults[idx][0], color='black', label='Fault', ls=':') @@ -699,12 +699,12 @@ def make_plots_for_adaptivity_paper(): # pragma: no cover def make_plots_for_resilience_paper(): # pragma: no cover - # plot_Lorenz_solution() - # plot_fault_Lorenz(0) - # plot_fault_Lorenz(20) + plot_Lorenz_solution() + plot_fault_Lorenz(0) + plot_fault_Lorenz(20) # compare_recovery_rate_problems() - plot_recovery_rate(get_stats(run_Lorenz)) - plot_recovery_rate_per_acceptance_threshold(run_Lorenz) + # plot_recovery_rate(get_stats(run_Lorenz)) + # plot_recovery_rate_per_acceptance_threshold(run_Lorenz) plt.show() diff --git a/pySDC/projects/Resilience/strategies.py b/pySDC/projects/Resilience/strategies.py index e0f5cbe06c..4c28d2b971 100644 --- a/pySDC/projects/Resilience/strategies.py +++ b/pySDC/projects/Resilience/strategies.py @@ -283,7 +283,7 @@ def get_base_parameters(self, problem, num_procs=1): max_runtime = { 'run_vdp': 1000, - 'run_Lorenz': 60, + 'run_Lorenz': 500, 'run_Schroedinger': 150, 'run_quench': 150, 'run_AC': 150, @@ -453,9 +453,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 243380 + return 12350 elif key == 'e_global_post_run' and op == max: - return 4.68e-04 + return 1.3527453646133836e-07 super().get_reference_value(problem, key, op, num_procs) @@ -578,9 +578,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == 'run_Lorenz': if key == 'work_newton' and op == sum: - return 44635 + return 2989 elif key == 'e_global_post_run' and op == max: - return 2.82e-04 + return 5.636767497207984e-08 super().get_reference_value(problem, key, op, num_procs) @@ -705,7 +705,7 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 4758 + return 5092 elif key == 'e_global_post_run' and op == max: return 4.107116318152748e-06 @@ -808,9 +808,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 1872 + return 9200 elif key == 'e_global_post_run' and op == max: - return 2.2362043480939064e-05 + return 2.139863344829962e-05 super().get_reference_value(problem, key, op, num_procs) @@ -856,9 +856,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 243380 + return 12350 elif key == 'e_global_post_run' and op == max: - return 4.68e-04 + return 1.3527453646133836e-07 super().get_reference_value(problem, key, op, num_procs) @@ -977,9 +977,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 243380 + return 12350 elif key == 'e_global_post_run' and op == max: - return 4.68e-04 + return 1.3527453646133836e-07 super().get_reference_value(problem, key, op, num_procs) @@ -1093,9 +1093,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 983 + return 1025 elif key == 'e_global_post_run' and op == max: - return 3.944880392126038e-06 + return 4.266975256683736e-06 super().get_reference_value(problem, key, op, num_procs) @@ -1132,7 +1132,7 @@ def get_reference_value(self, problem, key, op, num_procs=1): if key == 'work_newton' and op == sum: return 917 elif key == 'e_global_post_run' and op == max: - return 1.0587702028885815e-05 + return 1.0874929465387595e-05 super().get_reference_value(problem, key, op, num_procs) @@ -1163,9 +1163,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == 'run_Lorenz': if key == 'work_newton' and op == sum: - return 1358 + return 1338 elif key == 'e_global_post_run' and op == max: - return 0.00010316526647002888 + return 0.0001013999955041811 super().get_reference_value(problem, key, op, num_procs) @@ -1246,9 +1246,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 1820 + return 6835 elif key == 'e_global_post_run' and op == max: - return 0.00013730538358736055 + return 7.049480537091313e-07 super().get_reference_value(problem, key, op, num_procs) @@ -1437,9 +1437,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 984 + return 3572 elif key == 'e_global_post_run' and op == max: - return 3.148061889390874e-06 + return 4.126039954144289e-09 super().get_reference_value(problem, key, op, num_procs) @@ -1530,7 +1530,7 @@ def get_reference_value(self, problem, key, op, num_procs=1): if key == 'work_newton' and op == sum: return 0 elif key == 'e_global_post_run' and op == max: - return 3.5085474063834e-05 + return 1.509206128957885e-07 super().get_reference_value(problem, key, op, num_procs) @@ -1607,9 +1607,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == 'run_Lorenz': if key == 'work_newton' and op == sum: - return 1369 + return 2989 elif key == 'e_global_post_run' and op == max: - return 9.364841517367495e-06 + return 5.636763944494305e-08 super().get_reference_value(problem, key, op, num_procs) @@ -1662,9 +1662,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 1369 + return 2989 elif key == 'e_global_post_run' and op == max: - return 9.364841517367495e-06 + return 5.636763944494305e-08 super().get_reference_value(problem, key, op, num_procs) @@ -1966,9 +1966,9 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 25404 + return 2123 elif key == 'e_global_post_run' and op == max: - return 4.34e-04 + return 7.931560830343187e-08 super().get_reference_value(problem, key, op, num_procs) diff --git a/pySDC/projects/Resilience/tests/test_strategies.py b/pySDC/projects/Resilience/tests/test_strategies.py index 7e9b0e71a2..ce1649e14a 100644 --- a/pySDC/projects/Resilience/tests/test_strategies.py +++ b/pySDC/projects/Resilience/tests/test_strategies.py @@ -81,6 +81,7 @@ def single_test(strategy_name, useMPI, num_procs): use_MPI=useMPI, custom_controller_params=controller_params, comm=comm, + Tend=1.0, ) # things we want to test From 6108e79b903b25e1b78425213e81321f49cd0589 Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Wed, 16 Oct 2024 10:07:37 +0200 Subject: [PATCH 06/12] Updated reference values --- pySDC/projects/Resilience/strategies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pySDC/projects/Resilience/strategies.py b/pySDC/projects/Resilience/strategies.py index c4bb3d6a13..a16391f2fa 100644 --- a/pySDC/projects/Resilience/strategies.py +++ b/pySDC/projects/Resilience/strategies.py @@ -1246,7 +1246,7 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 1456 + return 5467 elif key == 'e_global_post_run' and op == max: return 7.049480537091313e-07 @@ -1437,7 +1437,7 @@ def get_reference_value(self, problem, key, op, num_procs=1): """ if problem.__name__ == "run_Lorenz": if key == 'work_newton' and op == sum: - return 820 + return 2963 elif key == 'e_global_post_run' and op == max: return 4.126039954144289e-09 From 8a855b08b2ff23e87ae8bfaacea9383480f0d958 Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Wed, 16 Oct 2024 11:31:59 +0200 Subject: [PATCH 07/12] Fix tests --- pySDC/projects/Resilience/strategies.py | 2 +- pySDC/projects/Resilience/tests/test_fault_injection.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pySDC/projects/Resilience/strategies.py b/pySDC/projects/Resilience/strategies.py index a16391f2fa..a3f797d327 100644 --- a/pySDC/projects/Resilience/strategies.py +++ b/pySDC/projects/Resilience/strategies.py @@ -156,7 +156,7 @@ def get_random_params(self, problem, num_procs): if problem.__name__ == "run_quench": rnd_params['iteration'] = 5 elif problem.__name__ == 'run_Lorenz': - rnd_params['iteration'] = 5 + rnd_params['iteration'] = 3 return rnd_params @property diff --git a/pySDC/projects/Resilience/tests/test_fault_injection.py b/pySDC/projects/Resilience/tests/test_fault_injection.py index beaf8e7141..37c4709a59 100644 --- a/pySDC/projects/Resilience/tests/test_fault_injection.py +++ b/pySDC/projects/Resilience/tests/test_fault_injection.py @@ -225,7 +225,7 @@ def generate_stats(load=False): from pySDC.projects.Resilience.strategies import ( BaseStrategy, AdaptivityStrategy, - IterateStrategy, + kAdaptivityStrategy, HotRodStrategy, ) from pySDC.projects.Resilience.fault_stats import ( @@ -245,7 +245,7 @@ def generate_stats(load=False): strategies=[ BaseStrategy(), AdaptivityStrategy(), - IterateStrategy(), + kAdaptivityStrategy(), HotRodStrategy(), ], stats_path='data', From 3974cc5592eb9e4e81c3b1b7f0de9baaa2f16ee8 Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Wed, 16 Oct 2024 11:50:39 +0200 Subject: [PATCH 08/12] Fixes --- pySDC/projects/Resilience/fault_stats.py | 4 ---- pySDC/projects/Resilience/strategies.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/pySDC/projects/Resilience/fault_stats.py b/pySDC/projects/Resilience/fault_stats.py index 5eee716822..cec47359d9 100644 --- a/pySDC/projects/Resilience/fault_stats.py +++ b/pySDC/projects/Resilience/fault_stats.py @@ -1696,10 +1696,6 @@ def main(): stats_analyser.get_recovered() mask = None - # mask = - stats_analyser.scrutinize(HotRodStrategy(), run=1, faults=False) - return None - # stats_analyser.compare_strategies() stats_analyser.plot_things_per_things( 'recovered', 'node', False, op=stats_analyser.rec_rate, mask=mask, args={'ylabel': 'recovery rate'} diff --git a/pySDC/projects/Resilience/strategies.py b/pySDC/projects/Resilience/strategies.py index a3f797d327..a16391f2fa 100644 --- a/pySDC/projects/Resilience/strategies.py +++ b/pySDC/projects/Resilience/strategies.py @@ -156,7 +156,7 @@ def get_random_params(self, problem, num_procs): if problem.__name__ == "run_quench": rnd_params['iteration'] = 5 elif problem.__name__ == 'run_Lorenz': - rnd_params['iteration'] = 3 + rnd_params['iteration'] = 5 return rnd_params @property From cdb15f3d3df1d7be062de73c164a638d0aae8929 Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Wed, 16 Oct 2024 16:52:21 +0200 Subject: [PATCH 09/12] Hopefully sped up tests somewhat --- .../Resilience/tests/test_fault_injection.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pySDC/projects/Resilience/tests/test_fault_injection.py b/pySDC/projects/Resilience/tests/test_fault_injection.py index 37c4709a59..e4972a7b2b 100644 --- a/pySDC/projects/Resilience/tests/test_fault_injection.py +++ b/pySDC/projects/Resilience/tests/test_fault_injection.py @@ -153,7 +153,7 @@ def test_fault_injection(): @pytest.mark.mpi4py @pytest.mark.slow -@pytest.mark.parametrize("numprocs", [5]) +@pytest.mark.parametrize("numprocs", [4]) def test_fault_stats(numprocs): """ Test generation of fault statistics and their recovery rates @@ -184,10 +184,10 @@ def test_fault_stats(numprocs): ), f"Expected {expected_max_combinations} possible combinations for faults in van der Pol problem, but got {stats.get_max_combinations()}!" recovered_reference = { - 'base': 1, - 'adaptivity': 2, + 'base': 0, + 'adaptivity': 1, 'iterate': 1, - 'Hot Rod': 2, + 'Hot Rod': 1, 'adaptivity_coll': 0, 'double_adaptivity': 0, } @@ -199,7 +199,7 @@ def test_fault_stats(numprocs): recovered_mask = stats.get_mask(strategy=strategy, key='recovered', op='eq', val=True) index = stats.get_index(mask=fixable_mask) - assert all(fixable_mask[:-1] == [False, True, False]), "Error in generating mask of fixable faults" + assert all(fixable_mask == [False, True]), "Error in generating mask of fixable faults" assert all(index == [1]), "Error when converting to index" combinations = np.array(stats.get_combination_counts(dat, keys=['bit'], mask=fixable_mask)) @@ -250,7 +250,7 @@ def generate_stats(load=False): ], stats_path='data', ) - stats.run_stats_generation(runs=4, step=2) + stats.run_stats_generation(runs=2, step=1) return stats From 3171b398c0fbac146f94e6eef5d40995ca3df4db Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Wed, 16 Oct 2024 17:41:28 +0200 Subject: [PATCH 10/12] Split the tests --- .../Resilience/tests/test_fault_injection.py | 74 ++++++++----------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/pySDC/projects/Resilience/tests/test_fault_injection.py b/pySDC/projects/Resilience/tests/test_fault_injection.py index e4972a7b2b..ad1101578b 100644 --- a/pySDC/projects/Resilience/tests/test_fault_injection.py +++ b/pySDC/projects/Resilience/tests/test_fault_injection.py @@ -153,29 +153,29 @@ def test_fault_injection(): @pytest.mark.mpi4py @pytest.mark.slow -@pytest.mark.parametrize("numprocs", [4]) -def test_fault_stats(numprocs): +@pytest.mark.parametrize('strategy_name', ['base', 'adaptivity', 'kAdaptivity', 'HotRod']) +def test_fault_stats(strategy_name): """ Test generation of fault statistics and their recovery rates """ import numpy as np + from pySDC.projects.Resilience.strategies import ( + BaseStrategy, + AdaptivityStrategy, + kAdaptivityStrategy, + HotRodStrategy, + ) - # Set python path once - my_env = os.environ.copy() - my_env['PYTHONPATH'] = '../../..:.' - my_env['COVERAGE_PROCESS_START'] = 'pyproject.toml' - - cmd = f"mpirun -np {numprocs} python {__file__} --test-fault-stats".split() + strategies = { + 'base': BaseStrategy, + 'adaptivity': AdaptivityStrategy, + 'kAdaptivity': kAdaptivityStrategy, + 'HotRod': HotRodStrategy, + } - p = subprocess.Popen(cmd, env=my_env, cwd=".") + strategy = strategies[strategy_name]() - p.wait() - assert p.returncode == 0, 'ERROR: did not get return code 0, got %s with %2i processes' % ( - p.returncode, - numprocs, - ) - - stats = generate_stats(True) + stats = generate_stats(strategy, True) # test number of possible combinations for faults expected_max_combinations = 3840 @@ -193,26 +193,25 @@ def test_fault_stats(numprocs): } stats.get_recovered() - for strategy in stats.strategies: - dat = stats.load(strategy=strategy, faults=True) - fixable_mask = stats.get_fixable_faults_only(strategy) - recovered_mask = stats.get_mask(strategy=strategy, key='recovered', op='eq', val=True) - index = stats.get_index(mask=fixable_mask) + dat = stats.load(strategy=strategy, faults=True) + fixable_mask = stats.get_fixable_faults_only(strategy) + recovered_mask = stats.get_mask(strategy=strategy, key='recovered', op='eq', val=True) + index = stats.get_index(mask=fixable_mask) - assert all(fixable_mask == [False, True]), "Error in generating mask of fixable faults" - assert all(index == [1]), "Error when converting to index" + assert all(fixable_mask == [False, True]), "Error in generating mask of fixable faults" + assert all(index == [1]), "Error when converting to index" - combinations = np.array(stats.get_combination_counts(dat, keys=['bit'], mask=fixable_mask)) - assert all(combinations == [1.0, 1.0]), "Error when counting combinations" + combinations = np.array(stats.get_combination_counts(dat, keys=['bit'], mask=fixable_mask)) + assert all(combinations == [1.0, 1.0]), "Error when counting combinations" - recovered = len(dat['recovered'][recovered_mask]) - crashed = len(dat['error'][dat['error'] == np.inf]) # on some systems the last run crashes... - assert ( - recovered >= recovered_reference[strategy.name] - crashed - ), f'Expected {recovered_reference[strategy.name]} recovered faults, but got {recovered} recovered faults in {strategy.name} strategy!' + recovered = len(dat['recovered'][recovered_mask]) + crashed = len(dat['error'][dat['error'] == np.inf]) # on some systems the last run crashes... + assert ( + recovered >= recovered_reference[strategy.name] - crashed + ), f'Expected {recovered_reference[strategy.name]} recovered faults, but got {recovered} recovered faults in {strategy.name} strategy!' -def generate_stats(load=False): +def generate_stats(strategy, load=False): """ Generate stats to check the recovery rate @@ -222,12 +221,6 @@ def generate_stats(load=False): Returns: Object containing the stats """ - from pySDC.projects.Resilience.strategies import ( - BaseStrategy, - AdaptivityStrategy, - kAdaptivityStrategy, - HotRodStrategy, - ) from pySDC.projects.Resilience.fault_stats import ( FaultStats, ) @@ -242,12 +235,7 @@ def generate_stats(load=False): recovery_thresh=1.1, num_procs=1, mode='random', - strategies=[ - BaseStrategy(), - AdaptivityStrategy(), - kAdaptivityStrategy(), - HotRodStrategy(), - ], + strategies=[strategy], stats_path='data', ) stats.run_stats_generation(runs=2, step=1) From e4b6dcc72399fb7a8ff525a87bf5a8ae76fc3e8b Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Thu, 17 Oct 2024 09:49:27 +0200 Subject: [PATCH 11/12] Removed some lengthy tests --- pySDC/projects/Resilience/tests/test_fault_injection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pySDC/projects/Resilience/tests/test_fault_injection.py b/pySDC/projects/Resilience/tests/test_fault_injection.py index ad1101578b..d23b5eccfc 100644 --- a/pySDC/projects/Resilience/tests/test_fault_injection.py +++ b/pySDC/projects/Resilience/tests/test_fault_injection.py @@ -153,7 +153,7 @@ def test_fault_injection(): @pytest.mark.mpi4py @pytest.mark.slow -@pytest.mark.parametrize('strategy_name', ['base', 'adaptivity', 'kAdaptivity', 'HotRod']) +@pytest.mark.parametrize('strategy_name', ['adaptivity']) def test_fault_stats(strategy_name): """ Test generation of fault statistics and their recovery rates From e526480d0c4491b9cc365e706e5b5f634a6e0b21 Mon Sep 17 00:00:00 2001 From: Thomas Baumann <39156931+brownbaerchen@users.noreply.github.com> Date: Thu, 17 Oct 2024 10:32:41 +0200 Subject: [PATCH 12/12] Removed test that is apparently flaky based on hardware --- pySDC/projects/Resilience/tests/test_strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pySDC/projects/Resilience/tests/test_strategies.py b/pySDC/projects/Resilience/tests/test_strategies.py index ce1649e14a..b218fc162a 100644 --- a/pySDC/projects/Resilience/tests/test_strategies.py +++ b/pySDC/projects/Resilience/tests/test_strategies.py @@ -17,7 +17,7 @@ 'AdaptivityPolynomialError', 'kAdaptivity', ] -STRATEGY_NAMES_NONMPIONLY = ['adaptiveHR', 'HotRod'] +STRATEGY_NAMES_NONMPIONLY = ['HotRod'] STRATEGY_NAMES_MPIONLY = ['ARK'] LOGGER_LEVEL = 30