diff --git a/docs/source/whatsnew/1.0.14.rst b/docs/source/whatsnew/1.0.14.rst
index 566eed95..b79c3fb7 100644
--- a/docs/source/whatsnew/1.0.14.rst
+++ b/docs/source/whatsnew/1.0.14.rst
@@ -11,6 +11,7 @@ Fixed
* Added table sorting in html reports.
* Added total metrics sorting in html reports.
* Updating some dependency requirements for tests.
+* Added Reliability Diagram to reports for probabilistic forecasts.
Contributors
~~~~~~~~~~~~
diff --git a/solarforecastarbiter/conftest.py b/solarforecastarbiter/conftest.py
index cc547c4e..00813233 100644
--- a/solarforecastarbiter/conftest.py
+++ b/solarforecastarbiter/conftest.py
@@ -2930,10 +2930,9 @@ def ser(interval_length):
@pytest.fixture
-def report_with_raw_xy(raw_report_dict_with_prob, raw_report_xy):
- raw_report_dict_with_prob['raw_report'] = raw_report_xy(True)
- raw_report_dict_with_prob['status'] = 'complete'
- report = datamodel.Report.from_dict(raw_report_dict_with_prob)
+def report_with_raw_xy(report_dict, raw_report_xy):
+ report_dict['raw_report'] = raw_report_xy(True)
+ report = datamodel.Report.from_dict(report_dict)
return report
diff --git a/solarforecastarbiter/reports/figures/plotly_figures.py b/solarforecastarbiter/reports/figures/plotly_figures.py
index 476e42de..35de1497 100644
--- a/solarforecastarbiter/reports/figures/plotly_figures.py
+++ b/solarforecastarbiter/reports/figures/plotly_figures.py
@@ -13,6 +13,7 @@
import pandas as pd
from plotly import __version__ as plotly_version
+from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
from matplotlib import cm
@@ -29,6 +30,7 @@
'#98df8a', '#ff9896', '#c5b0d5', '#c49c94', '#f7b6d2', '#c7c7c7',
'#dbdb8d', '#9edae5']
PALETTE = (D3_PALETTE[::2] + D3_PALETTE[1::2])
+MARKER_TYPES = ['circle', 'diamond', 'triangle-up', 'triangle-down']
def gen_grays(num_colors):
@@ -1169,8 +1171,163 @@ def probabilistic_timeseries():
raise NotImplementedError
-def reliability_diagram():
- raise NotImplementedError
+def reliability_diagram(report):
+ """Create the reliability diagram for use with reports with probabilisitc
+ forecasts.
+
+ Parameters
+ ----------
+ report: :py:class:`solarforecastarbiter.datamodel.Report`
+
+ Returns
+ -------
+ reliability_diag_spec: str
+ String json specification of the reliability diagram plot. None if no
+ probabilistic 'y' forecast values are available.
+ reliability_histogram_spec : str
+ String json specification of the histogram corresponding to the
+ reliability. None if no probabilistic 'y' forecast values are
+ available.
+ """ # NOQA
+ value_df, meta_df = construct_timeseries_dataframe(report)
+
+ if value_df.empty:
+ # No forecast data, don't plot anything
+ return (None, None)
+
+ fxobs = report.raw_report.processed_forecasts_observations
+ units = fxobs[0].original.forecast.units
+ units = units.replace('^2', '2')
+
+ # Get all probabilisitic forecasts with % constant values
+ _cv_split = 'Prob(f <= x) =' # HACK: need PFCV to have reference to PF
+ pfxobs = []
+ pfx_groups = []
+ for fxob in fxobs:
+ # only process probabilistic forecasts
+ if (isinstance(fxob.original.forecast, (
+ datamodel.ProbabilisticForecastConstantValue)) and
+ fxob.original.forecast.axis == 'y'):
+ pfxobs.append(fxob)
+ parent_name = fxob.name.split(_cv_split)[0].strip()
+ if parent_name not in pfx_groups:
+ pfx_groups.append(parent_name)
+
+ # Short-circuit if no probabilistic forecasts found
+ if len(pfxobs) == 0:
+ return (None, None)
+
+ # Calculate ratio of correct forecasts
+ rd_df = pd.DataFrame(columns=['group', 'constant_value', 'ratio'])
+ prev_group = None
+ prev_pos_count = 0
+ for pfx in pfxobs:
+ group = pfx.original.forecast.name
+ if group != prev_group:
+ prev_pos_count = 0
+ cv = pfx.original.forecast.constant_value
+ fxcv = f'{group} Prob(f <= x) = {cv}%'
+ pindex = meta_df[meta_df['forecast_name'] ==
+ fxcv]['pair_index'].values[0]
+ pi_df = value_df[value_df['pair_index'] == pindex]
+ positive_count = (pi_df['observation_values'] <
+ pi_df['forecast_values']).sum()
+ ratio = positive_count / len(pi_df)
+ lcount = positive_count - prev_pos_count
+ rd_df = rd_df.append({'group': group,
+ 'constant_value': cv,
+ 'ratio': ratio,
+ 'lcount': lcount}, ignore_index=True)
+ prev_pos_count = positive_count
+ prev_group = group
+
+ # Reliability Diagram
+ palette = cycle(PALETTE)
+ markers = cycle(MARKER_TYPES)
+
+ fig_rd = go.Figure() # Reliability Diagram
+ n_rows = int(np.ceil(len(pfx_groups)/2))
+ fig_hg = make_subplots(rows=n_rows, cols=2,
+ row_heights=[0.5]*n_rows,
+ horizontal_spacing=0.1,
+ vertical_spacing=0.2) # Bar Charts
+
+ irow = 1
+ icol = 1
+ for group in rd_df.group.unique():
+ color = next(palette)
+ mark = next(markers)
+ group_df = rd_df[rd_df['group'] == group]
+ # Reliability diagram
+ fig_rd.add_trace(go.Scatter(x=group_df.constant_value/100.0,
+ y=group_df.ratio, mode='lines+markers',
+ name=group,
+ marker=dict(symbol=mark,
+ line=dict(color='black',
+ width=1)),
+ line=dict(color=color, width=3)))
+
+ # Bar chart
+ widths = group_df.constant_value.diff().values
+ widths[0] = group_df.constant_value.values[0] - 0.
+ fig_hg.add_trace(go.Bar(x=(group_df.constant_value-widths)/100.0,
+ y=group_df.lcount,
+ width=widths/100.0,
+ text=group_df.constant_value.values,
+ hovertemplate="P <= %{text:.1f}%" +
+ "
Count=%{y}",
+ marker=dict(color=color),
+ name=group,
+ offset=0),
+ irow, icol)
+
+ if icol == 2:
+ irow += 1
+ icol = 1
+ else:
+ icol += 1
+
+ # Reliability Diagram
+ fig_rd.add_trace(go.Scatter(x=[0.0, 1.0], y=[0.0, 1.0],
+ name='Perfect Calibration',
+ line=dict(color='grey',
+ width=2,
+ dash='dash')))
+
+ fig_rd.update_xaxes(title_text="Predicted Probability (f <= x)",
+ showgrid=True,
+ gridwidth=1, gridcolor='#CCC', showline=True,
+ linewidth=1, linecolor='black', ticks='outside',
+ tickformat=".1%")
+ fig_rd.update_yaxes(title_text="Ratio of Positive Predictions",
+ showgrid=True,
+ gridwidth=1, gridcolor='#CCC', showline=True,
+ linewidth=1, linecolor='black', ticks='outside',
+ tickformat=".1f")
+ fig_rd.update_layout(title="Reliability Diagram",
+ legend=dict(font=dict(size=12),
+ x=0.1, y=0.9),
+ plot_bgcolor=PLOT_BGCOLOR,
+ font=dict(size=14),
+ width=700,
+ height=700)
+
+ # Bar Charts
+ fig_hg.update_xaxes(title_text="Predicted Probability Bin", showgrid=True,
+ gridwidth=1, gridcolor='#CCC', showline=True,
+ linewidth=1, linecolor='black', ticks='outside',
+ tickformat=".1%")
+ fig_hg.update_yaxes(title_text="Count", showgrid=True,
+ gridwidth=1, gridcolor='#CCC', showline=True,
+ linewidth=1, linecolor='black', ticks='outside')
+ fig_hg.update_layout(title="Count of Nearest Correct Prediction",
+ plot_bgcolor=PLOT_BGCOLOR,
+ legend=dict(font=dict(size=10),
+ orientation='v'),
+ font=dict(size=12),
+ width=900)
+
+ return (fig_rd.to_json(), fig_hg.to_json())
def rank_histogram():
diff --git a/solarforecastarbiter/reports/figures/tests/test_plotly_figures.py b/solarforecastarbiter/reports/figures/tests/test_plotly_figures.py
index 41cac30f..c3ba98ae 100644
--- a/solarforecastarbiter/reports/figures/tests/test_plotly_figures.py
+++ b/solarforecastarbiter/reports/figures/tests/test_plotly_figures.py
@@ -673,3 +673,17 @@ def make_row(name, abbrev, value):
# assert hover text (original name) matches x label order
assert (figure.data[0]['text'] == np.array([
'CA GHI', "DA GHi01", "DA GHi02", "DA GHi03", "EA GHI"])).all()
+
+
+def test_reliability_diagram_no_cdf_cv(report_with_raw):
+ rd_spec, hg_spec = figures.reliability_diagram(
+ report_with_raw)
+ assert rd_spec is None
+ assert hg_spec is None
+
+
+def test_reliability_diagram(report_with_raw_xy):
+ rd_spec, hg_spec = figures.reliability_diagram(
+ report_with_raw_xy)
+ assert isinstance(rd_spec, str)
+ assert isinstance(hg_spec, str)
diff --git a/solarforecastarbiter/reports/template.py b/solarforecastarbiter/reports/template.py
index b264b520..ae144b85 100644
--- a/solarforecastarbiter/reports/template.py
+++ b/solarforecastarbiter/reports/template.py
@@ -16,7 +16,7 @@
from plotly import __version__ as plotly_version
-from solarforecastarbiter import datamodel
+from solarforecastarbiter import datamodel, utils
from solarforecastarbiter.reports.figures import plotly_figures
@@ -231,6 +231,11 @@ def _get_render_kwargs(report, dash_url, with_timeseries):
kwargs['includes_distribution'] = timeseries_specs[3]
+ if utils.any_probabilistic_forecast(report):
+ rd_specs = plotly_figures.reliability_diagram(report)
+ kwargs['reliability_diag_spec'] = rd_specs[0]
+ kwargs['reliability_cnt_spec'] = rd_specs[1]
+
return kwargs
diff --git a/solarforecastarbiter/reports/templates/html/body.html b/solarforecastarbiter/reports/templates/html/body.html
index 79a8be86..3c6aeaf0 100644
--- a/solarforecastarbiter/reports/templates/html/body.html
+++ b/solarforecastarbiter/reports/templates/html/body.html
@@ -322,7 +322,7 @@
A table of summary statistics for the resampled observations and deterministic forecasts over the entire study period is available below. Statistics for other @@ -376,9 +376,32 @@
+ The Reliability Diagram displays the ratio (relative frequency) of positive predictions for for each constant value that is provided for same group of a probabilistic forecast.
+ The dashed line shows a perfectly callibrated forecast where each constant value is correct at a ratio equal to that prediction interval.
+
+ The count of the samples in the bar charts below are the number of correct predictions for that constant value prediction interval minus the nearest left-hand bin.
+ This provides an indication of the number of correct predictions that are closest to that interval.
+