@@ -90,6 +90,59 @@ def __post_init__(self, assign_centroids):
9090 if assign_centroids :
9191 self .exposure .assign_centroids (self .hazard )
9292
93+ def impact_to_aligned_df (
94+ self , impact : Impact , fillna : float = np .nan
95+ ) -> Tuple [pd .DataFrame , pd .DataFrame ]:
96+ """Create a dataframe from an impact and align it with the data.
97+
98+ When aligning, two general cases might occur, which are not mutually exclusive:
99+
100+ 1. There are data points for which no impact was computed. This will always be
101+ treated as an impact of zero.
102+ 2. There are impacts for which no data points exist. For these points, the input
103+ data will be filled with the value of :py:attr:`Input.missing_data_value`.
104+
105+ This method performs the following steps:
106+
107+ * Transform the impact into a dataframe using :py:attr:`impact_to_dataframe`.
108+ * Align the :py:attr:`data` with the impact dataframe, using
109+ :py:attr:`missing_data_value` as fill value.
110+ * Align the impact dataframe with the data, using zeros as fill value.
111+ * In the aligned impact, set all values to zero where the data is NaN.
112+ * Fill remaining NaNs in data with ``fillna``.
113+
114+ Parameters
115+ ----------
116+ impact_df : pandas.DataFrame
117+ The impact computed by the model, transformed into a dataframe by
118+ :py:attr:`Input.impact_to_dataframe`.
119+
120+ Returns
121+ -------
122+ data_aligned : pd.DataFrame
123+ The data aligned to the impact dataframe
124+ impact_df_aligned : pd.DataFrame
125+ The impact transformed to a dataframe and aligned with the data
126+ """
127+ # Transform impact to to dataframe
128+ impact_df = self .impact_to_dataframe (impact )
129+ if impact_df .isna ().any (axis = None ):
130+ raise ValueError ("NaN values computed in impact!" )
131+
132+ # Align with different fill values
133+ data_aligned , _ = self .data .align (
134+ impact_df , axis = None , fill_value = self .missing_data_value , copy = True
135+ )
136+ impact_df_aligned , _ = impact_df .align (
137+ data_aligned , join = "right" , axis = None , fill_value = 0.0 , copy = False
138+ )
139+
140+ # Set all impacts to zero for which data is NaN
141+ impact_df_aligned .where (data_aligned .notna (), 0.0 , inplace = True )
142+
143+ # NOTE: impact_df_aligned should not contain any NaNs at this point
144+ return data_aligned .fillna (fillna ), impact_df_aligned .fillna (fillna )
145+
93146
94147@dataclass
95148class Output :
@@ -163,7 +216,6 @@ def plot_impf_variability(
163216 plot_impf_kws : Optional [dict ] = None ,
164217 plot_hist_kws : Optional [dict ] = None ,
165218 ):
166-
167219 """Plot impact function variability with parameter combinations of
168220 almost equal cost function values
169221
@@ -190,7 +242,7 @@ def plot_impf_variability(
190242 if p_space_df is None :
191243 # Assert that self.output has the p_space_to_dataframe() method,
192244 # which is defined for the BayesianOptimizerOutput class
193- if not hasattr (self .output ,"p_space_to_dataframe" ):
245+ if not hasattr (self .output , "p_space_to_dataframe" ):
194246 raise TypeError (
195247 "To derive the full impact function parameter space, "
196248 "plot_impf_variability() requires BayesianOptimizerOutput "
@@ -203,74 +255,93 @@ def plot_impf_variability(
203255 # and remove the dimension 'Cost Function'.
204256 params = p_space_df .columns .tolist ()
205257 try :
206- params .remove (' Cost Function' )
258+ params .remove (" Cost Function" )
207259 except ValueError :
208260 pass
209261
210262 # Retrieve parameters of impact functions with cost function values
211263 # within 'cost_func_diff' % of the best estimate
212264 params_within_range = p_space_df [params ]
213- plot_space_label = ' Parameter space'
265+ plot_space_label = " Parameter space"
214266 if cost_func_diff is not None :
215- max_cost_func_val = (p_space_df ['Cost Function' ].min ()*
216- (1 + cost_func_diff ))
267+ max_cost_func_val = p_space_df ["Cost Function" ].min () * (1 + cost_func_diff )
217268 params_within_range = p_space_df .loc [
218- p_space_df [' Cost Function' ] <= max_cost_func_val ,params
269+ p_space_df [" Cost Function" ] <= max_cost_func_val , params
219270 ]
220- plot_space_label = (f"within { int (cost_func_diff * 100 )} percent "
221- f"of best fit" )
271+ plot_space_label = (
272+ f"within { int (cost_func_diff * 100 )} percent " f"of best fit"
273+ )
222274
223275 # Set plot defaults
224- color = plot_impf_kws .pop (' color' , ' tab:blue' )
225- lw = plot_impf_kws .pop ('lw' , 2 )
226- zorder = plot_impf_kws .pop (' zorder' , 3 )
227- label = plot_impf_kws .pop (' label' , ' best fit' )
276+ color = plot_impf_kws .pop (" color" , " tab:blue" )
277+ lw = plot_impf_kws .pop ("lw" , 2 )
278+ zorder = plot_impf_kws .pop (" zorder" , 3 )
279+ label = plot_impf_kws .pop (" label" , " best fit" )
228280
229- #get number of impact functions and create a plot for each
281+ # get number of impact functions and create a plot for each
230282 n_impf = len (self .impf_set .get_func (haz_type = haz_type ))
231- axes = []
283+ axes = []
232284
233285 for impf_idx in range (n_impf ):
286+ _ , ax = plt .subplots ()
234287
235- _ ,ax = plt .subplots ()
236-
237- #Plot best-fit impact function
288+ # Plot best-fit impact function
238289 best_impf = self .impf_set .get_func (haz_type = haz_type )[impf_idx ]
239- ax .plot (best_impf .intensity ,best_impf .mdd * best_impf .paa * 100 ,
240- color = color ,lw = lw ,zorder = zorder ,label = label ,** plot_impf_kws )
241-
242- #Plot all impact functions within 'cost_func_diff' % of best estimate
290+ ax .plot (
291+ best_impf .intensity ,
292+ best_impf .mdd * best_impf .paa * 100 ,
293+ color = color ,
294+ lw = lw ,
295+ zorder = zorder ,
296+ label = label ,
297+ ** plot_impf_kws ,
298+ )
299+
300+ # Plot all impact functions within 'cost_func_diff' % of best estimate
243301 for row in range (params_within_range .shape [0 ]):
244302 label_temp = plot_space_label if row == 0 else None
245303
246- sel_params = params_within_range .iloc [row ,:].to_dict ()
304+ sel_params = params_within_range .iloc [row , :].to_dict ()
247305 temp_impf_set = self .input .impact_func_creator (** sel_params )
248306 temp_impf = temp_impf_set .get_func (haz_type = haz_type )[impf_idx ]
249307
250- ax .plot (temp_impf .intensity ,temp_impf .mdd * temp_impf .paa * 100 ,
251- color = 'grey' ,alpha = 0.4 ,label = label_temp )
308+ ax .plot (
309+ temp_impf .intensity ,
310+ temp_impf .mdd * temp_impf .paa * 100 ,
311+ color = "grey" ,
312+ alpha = 0.4 ,
313+ label = label_temp ,
314+ )
252315
253316 # Plot hazard intensity value distributions
254317 if plot_haz :
255318 haz_vals = self .input .hazard .intensity [
256319 :, self .input .exposure .gdf [f"centr_{ haz_type } " ]
257320 ]
258321
259- #Plot defaults
260- color_hist = plot_hist_kws .pop (' color' , ' tab:orange' )
261- alpha_hist = plot_hist_kws .pop (' alpha' , 0.3 )
322+ # Plot defaults
323+ color_hist = plot_hist_kws .pop (" color" , " tab:orange" )
324+ alpha_hist = plot_hist_kws .pop (" alpha" , 0.3 )
262325
263326 ax2 = ax .twinx ()
264- ax2 .hist (haz_vals .data ,bins = 40 ,color = color_hist ,
265- alpha = alpha_hist ,label = 'Hazard intensity\n occurence' )
266- ax2 .set (ylabel = 'Hazard intensity occurence (#Exposure points)' )
267- ax .axvline (x = haz_vals .max (),label = 'Maximum hazard value' ,
268- color = 'tab:orange' )
269- ax2 .legend (loc = 'lower right' )
270-
271- ax .set (xlabel = f"Intensity ({ self .input .hazard .units } )" ,
327+ ax2 .hist (
328+ haz_vals .data ,
329+ bins = 40 ,
330+ color = color_hist ,
331+ alpha = alpha_hist ,
332+ label = "Hazard intensity\n occurence" ,
333+ )
334+ ax2 .set (ylabel = "Hazard intensity occurence (#Exposure points)" )
335+ ax .axvline (
336+ x = haz_vals .max (), label = "Maximum hazard value" , color = "tab:orange"
337+ )
338+ ax2 .legend (loc = "lower right" )
339+
340+ ax .set (
341+ xlabel = f"Intensity ({ self .input .hazard .units } )" ,
272342 ylabel = "Mean Damage Ratio (MDR) in %" ,
273- xlim = (min (best_impf .intensity ),max (best_impf .intensity )))
343+ xlim = (min (best_impf .intensity ), max (best_impf .intensity )),
344+ )
274345 ax .legend ()
275346 axes .append (ax )
276347
@@ -279,13 +350,12 @@ def plot_impf_variability(
279350
280351 return ax
281352
282-
283353 def plot_at_event (
284354 self ,
285355 data_transf : Callable [[pd .DataFrame ], pd .DataFrame ] = lambda x : x ,
286356 ** plot_kwargs ,
287357 ):
288- """Create a bar plot comparing estimated model output and data per event
358+ """Create a bar plot comparing estimated model output and data per event.
289359
290360 Every row of the :py:attr:`Input.data` is considered an event.
291361 The data to be plotted can be transformed with a generic function
@@ -305,21 +375,23 @@ def plot_at_event(
305375 -------
306376 ax : matplotlib.axes.Axes
307377 The plot axis returned by ``DataFrame.plot.bar``
378+
379+ Note
380+ ----
381+ This plot does *not* include the ignored impact, see :py:attr:`Input.data`.
308382 """
309- data = pd .concat (
310- [
311- self .input .impact_to_dataframe (self .impact ).sum (axis = "columns" ),
312- self .input .data .sum (axis = "columns" ),
313- ],
383+ data , impact = self .input .impact_to_aligned_df (self .impact )
384+ values = pd .concat (
385+ [impact .sum (axis = "columns" ), data .sum (axis = "columns" )],
314386 axis = 1 ,
315387 ).rename (columns = {0 : "Model" , 1 : "Data" })
316388
317389 # Transform data before plotting
318- data = data_transf (data )
390+ values = data_transf (values )
319391
320392 # Now plot
321393 ylabel = plot_kwargs .pop ("ylabel" , self ._impact_label )
322- return data .plot .bar (ylabel = ylabel , ** plot_kwargs )
394+ return values .plot .bar (ylabel = ylabel , ** plot_kwargs )
323395
324396 def plot_at_region (
325397 self ,
@@ -346,21 +418,23 @@ def plot_at_region(
346418 -------
347419 ax : matplotlib.axes.Axes
348420 The plot axis returned by ``DataFrame.plot.bar``.
421+
422+ Note
423+ ----
424+ This plot does *not* include the ignored impact, see :py:attr:`Input.data`.
349425 """
350- data = pd .concat (
351- [
352- self .input .impact_to_dataframe (self .impact ).sum (axis = "index" ),
353- self .input .data .sum (axis = "index" ),
354- ],
426+ data , impact = self .input .impact_to_aligned_df (self .impact )
427+ values = pd .concat (
428+ [impact .sum (axis = "index" ), data .sum (axis = "index" )],
355429 axis = 1 ,
356430 ).rename (columns = {0 : "Model" , 1 : "Data" })
357431
358432 # Transform data before plotting
359- data = data_transf (data )
433+ values = data_transf (values )
360434
361435 # Now plot
362436 ylabel = plot_kwargs .pop ("ylabel" , self ._impact_label )
363- return data .plot .bar (ylabel = ylabel , ** plot_kwargs )
437+ return values .plot .bar (ylabel = ylabel , ** plot_kwargs )
364438
365439 def plot_event_region_heatmap (
366440 self ,
@@ -391,13 +465,12 @@ def plot_event_region_heatmap(
391465
392466 """
393467 # Data preparation
394- agg = self .input .impact_to_dataframe (self .impact )
395- data = (agg + 1 ) / (self .input .data + 1 )
396- data = data .transform (np .log10 )
397- data = data .where ((agg > 0 ) | (self .input .data > 0 ))
468+ data , impact = self .input .impact_to_aligned_df (self .impact )
469+ values = (impact + 1 ) / (data + 1 ) # Avoid division by zero
470+ values = values .transform (np .log10 )
398471
399472 # Transform data
400- data = data_transf (data )
473+ values = data_transf (values )
401474
402475 # Default plot settings
403476 annot = plot_kwargs .pop ("annot" , True )
@@ -411,7 +484,7 @@ def plot_event_region_heatmap(
411484 )
412485
413486 return sns .heatmap (
414- data ,
487+ values ,
415488 annot = annot ,
416489 vmin = vmin ,
417490 vmax = vmax ,
@@ -482,53 +555,6 @@ def _kwargs_to_impact_func_creator(self, *_, **kwargs) -> Dict[str, Any]:
482555 """
483556 return kwargs
484557
485- def _align_impact_with_data (
486- self , impact_df : pd .DataFrame
487- ) -> Tuple [pd .DataFrame , pd .DataFrame ]:
488- """Align the impact dataframe with the input data dataframe
489-
490- When aligning, two general cases might occur, which are not mutually exclusive:
491-
492- 1. There are data points for which no impact was computed. This will always be
493- treated as an impact of zero.
494- 2. There are impacts for which no data points exist. For these points, the input
495- data will be filled with the value of :py:attr:`Input.missing_data_value`.
496-
497- Parameters
498- ----------
499- impact_df : pandas.DataFrame
500- The impact computed by the model, transformed into a dataframe by
501- :py:attr:`Input.impact_to_dataframe`.
502-
503- Returns
504- -------
505- data_aligned : pandas.DataFrame
506- The :py:attr:`Input.data` aligned with the impact.
507- impact_df_aligned : pandas.DataFrame
508- The ``impact_df`` aligned with the data.
509-
510- Raises
511- ------
512- ValueError
513- If ``impact_df`` contains NaNs before aligning.
514- """
515- if impact_df .isna ().any (axis = None ):
516- raise ValueError ("NaN values computed in impact!" )
517-
518- data_aligned , impact_df_aligned = self .input .data .align (
519- impact_df , axis = None , fill_value = None
520- )
521-
522- # Add user-set value for non-aligned data
523- data_aligned [
524- impact_df_aligned .notna () & data_aligned .isna ()
525- ] = self .input .missing_data_value
526-
527- # Set all impacts to zero for which data is NaN
528- impact_df_aligned .where (data_aligned .notna (), inplace = True )
529-
530- return data_aligned .fillna (0 ), impact_df_aligned .fillna (0 )
531-
532558 def _opt_func (self , * args , ** kwargs ) -> Number :
533559 """The optimization function iterated by the optimizer
534560
@@ -557,8 +583,9 @@ def _opt_func(self, *args, **kwargs) -> Number:
557583 ).impact (** self .input .impact_calc_kwds )
558584
559585 # Transform to DataFrame, align, and compute target function
560- impact_df = self .input .impact_to_dataframe (impact )
561- data_aligned , impact_df_aligned = self ._align_impact_with_data (impact_df )
586+ data_aligned , impact_df_aligned = self .input .impact_to_aligned_df (
587+ impact , fillna = 0
588+ )
562589 return self ._target_func (data_aligned , impact_df_aligned )
563590
564591 @abstractmethod
0 commit comments