diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b3fbfa..6866892 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,9 @@ The changes listed in this file are categorised as follows: ### Added +* ([#87](https://github.com/iiasa/climate-assessment/issues/87)) Add `return_all_runs` + option to `post_process` to return individual climate model runs instead of + percentile aggregation * ([#68](https://github.com/iiasa/climate-assessment/pull/68)) Update supported dependencies and python versions * ([#58](https://github.com/iiasa/climate-assessment/pull/58)) Update requirements diff --git a/src/climate_assessment/cli.py b/src/climate_assessment/cli.py index 73d747b..9534967 100644 --- a/src/climate_assessment/cli.py +++ b/src/climate_assessment/cli.py @@ -178,6 +178,14 @@ type=bool, show_default=True, ) +return_all_runs_option = click.option( + "--return-all-runs/--no-return-all-runs", + help="Return individual climate model runs instead of percentile aggregation", + required=False, + default=False, + type=bool, + show_default=True, +) categorisation_option = click.option( "--categorisation/--no-categorisation", help="Add temperature category to meta data", @@ -381,6 +389,7 @@ def _harmonize_and_infill( @scenario_batch_size_option @infilling_database_option @save_raw_climate_output_option +@return_all_runs_option @postprocess_option @categorisation_option @report_completeness_option @@ -406,6 +415,7 @@ def workflow( scenario_batch_size, infilling_database, save_raw_climate_output, + return_all_runs, postprocess, categorisation, reporting_completeness_categorisation, @@ -444,6 +454,7 @@ def workflow( scenario_batch_size=scenario_batch_size, infilling_database=infilling_database, save_raw_climate_output=save_raw_climate_output, + return_all_runs=return_all_runs, postprocess=postprocess, categorisation=categorisation, reporting_completeness_categorisation=reporting_completeness_categorisation, @@ -479,6 +490,7 @@ def run_workflow( ) ), save_raw_climate_output=False, + return_all_runs=False, postprocess=True, categorisation=True, reporting_completeness_categorisation=False, @@ -618,6 +630,7 @@ def run_workflow( test_run=test_run, scenario_batch_size=scenario_batch_size, save_raw_output=save_raw_climate_output, + return_all_runs=return_all_runs, co2_and_non_co2_warming=co2_and_non_co2_warming, prefix=prefix, ) @@ -886,6 +899,7 @@ def infill( @gwp_def_false_option @nonco2_warming_option @save_raw_climate_output_option +@return_all_runs_option @save_csv_combined_output_option def clim_cli( harmonizedinfilledemissions, @@ -905,6 +919,7 @@ def clim_cli( gwp, co2_and_non_co2_warming, save_raw_climate_output, + return_all_runs, save_csv_combined_output, ): """ @@ -950,6 +965,7 @@ def clim_cli( test_run=test_run, scenario_batch_size=scenario_batch_size, save_raw_output=save_raw_climate_output, + return_all_runs=return_all_runs, co2_and_non_co2_warming=co2_and_non_co2_warming, prefix=prefix, ) diff --git a/src/climate_assessment/climate/__init__.py b/src/climate_assessment/climate/__init__.py index 47b358b..23babdc 100644 --- a/src/climate_assessment/climate/__init__.py +++ b/src/climate_assessment/climate/__init__.py @@ -38,6 +38,7 @@ def climate_assessment( test_run=False, scenario_batch_size=20, save_raw_output=False, + return_all_runs=False, probabilistic_file=DEFAULT_MAGICC_DRAWNSET, magicc_extra_config=None, fair_extra_config=None, @@ -197,6 +198,7 @@ def save_pyam_style_meta_table(outpath, meta_table): historical_warming_reference_period=historical_warming_reference_period, historical_warming_evaluation_period=historical_warming_evaluation_period, save_raw_output=save_raw_output, + return_all_runs=return_all_runs, outdir=outdir, test_run=test_run, co2_and_non_co2_warming=co2_and_non_co2_warming, @@ -280,6 +282,7 @@ def run_and_post_process( outdir, test_run, save_raw_output, + return_all_runs, co2_and_non_co2_warming, ): """ @@ -317,6 +320,10 @@ def run_and_post_process( save_raw_output: bool If True, save all the raw climate model output for later analysis. + return_all_runs : bool + If True, return individual climate model runs instead of + percentile-aggregated results. See :func:`post_process` for details. + co2_and_non_co2_warming : bool Include assessment of CO2 and non-CO2 warming? @@ -405,6 +412,7 @@ def run_and_post_process( outdir, test_run=test_run, save_raw_output=save_raw_output, + return_all_runs=return_all_runs, co2_and_non_co2_warming=co2_and_non_co2_warming, historical_warming=historical_warming, historical_warming_reference_period=historical_warming_reference_period, diff --git a/src/climate_assessment/climate/post_process.py b/src/climate_assessment/climate/post_process.py index b46a114..f7183bc 100644 --- a/src/climate_assessment/climate/post_process.py +++ b/src/climate_assessment/climate/post_process.py @@ -213,6 +213,7 @@ def post_process( outdir, test_run=False, save_raw_output=False, + return_all_runs=False, co2_and_non_co2_warming=False, # for exceedance probability calculations temp_thresholds=(1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0), @@ -237,6 +238,58 @@ def post_process( historical_warming_reference_period="1850-1900", historical_warming_evaluation_period="1995-2014", ): + """ + Post-process climate model output into assessment variables. + + Parameters + ---------- + res : :obj:`scmdata.ScmRun` + Raw climate model output from ``openscm-runner`` + + outdir : str + Directory for saving output files + + test_run : bool + If True, skip strict historical temperature matching checks + + save_raw_output : bool + If True, save raw climate model output (every ensemble member) to disk + + return_all_runs : bool + If True, return individual climate model runs with run IDs encoded in + the model name (``model|run_N``) and climate model in the variable name + (``variable|climate_model``), instead of percentile-aggregated results. + Returns a 3-tuple of ``(res, res_all_runs, meta_table)`` where + ``res_all_runs`` is an :obj:`scmdata.ScmRun` with per-run timeseries and + ``meta_table`` is a :class:`pandas.DataFrame` with model/scenario pairs. + + co2_and_non_co2_warming : bool + Include assessment of CO2 and non-CO2 warming + + temp_thresholds : tuple of float + Temperature thresholds for exceedance probability calculation + + peak_percentiles : tuple of float + Percentiles for peak warming statistics + + percentiles : tuple of float + Percentiles for timeseries aggregation + + historical_warming : float + Historical warming to match climate model output to + + historical_warming_reference_period : str + Reference period for historical warming (e.g. "1850-1900") + + historical_warming_evaluation_period : str + Evaluation period for historical warming (e.g. "1995-2014") + + Returns + ------- + tuple + If ``return_all_runs`` is False (default): ``(res, res_percentiles, meta_table)`` + If ``return_all_runs`` is True: ``(res, res_all_runs, meta_table)`` + """ LOGGER.info("Beginning climate post-processing") LOGGER.info("Removing unknown units and keeping only World data") res = res.filter(unit="unknown", keep=False).filter(region="World") @@ -325,7 +378,7 @@ def _rename_vars(v): ) res = res.append(exceedance_probability_timeseries) - year_filter = range(1995, 2101) + year_filter = range(1950, 2101) LOGGER.info("Keeping only data from %s", year_filter) res = res.filter(year=year_filter) @@ -400,6 +453,32 @@ def _convert_to_standard_name_and_unit(vdf): res = res.groupby("variable").map(_convert_to_standard_name_and_unit) + if return_all_runs: + LOGGER.info("Returning all individual runs (skipping percentile aggregation)") + res_df = res.timeseries().reset_index() + + LOGGER.info( + "Encoding run_id in model name and adding climate model to variable" + ) + res_df["model"] = ( + res_df["model"].astype(str) + "|run_" + res_df["run_id"].astype(str) + ) + res_df["variable"] = ( + res_df["variable"].astype(str) + "|" + res_df["climate_model"].astype(str) + ) + res_all_runs = scmdata.ScmRun( + res_df.drop(["climate_model", "run_id"], axis="columns") + ) + + # Create empty meta table with correct structure (model, scenario columns) + unique_combos = res_df[["model", "scenario"]].drop_duplicates() + meta_table = pd.DataFrame( + {"model": unique_combos["model"], "scenario": unique_combos["scenario"]} + ) + + LOGGER.info("Exiting post-processing (all runs mode)") + return res, res_all_runs, meta_table + LOGGER.info("Calculating percentiles") res_percentiles = res.quantiles_over( "run_id", np.array(percentiles) / 100 diff --git a/tests/unit/test_climate.py b/tests/unit/test_climate.py index cd2d1c7..2115791 100644 --- a/tests/unit/test_climate.py +++ b/tests/unit/test_climate.py @@ -1,8 +1,15 @@ +import datetime as dt import re +import numpy as np +import pandas as pd import pytest +import scmdata -from climate_assessment.climate.post_process import check_hist_warming_period +from climate_assessment.climate.post_process import ( + check_hist_warming_period, + post_process, +) @pytest.mark.parametrize( @@ -34,3 +41,76 @@ def test_check_hist_warming_period_malformed(inp): ) with pytest.raises(ValueError, match=error_msg): check_hist_warming_period(inp) + + +def _build_synthetic_climate_output(run_ids=(0, 1)): + """Build minimal synthetic ScmRun data that passes through post_process.""" + years = list(range(1850, 2101)) + n_years = len(years) + time_cols = [dt.datetime(y, 1, 1) for y in years] + + variables = [ + ("Surface Air Temperature Change", "K"), + ("Surface Air Ocean Blended Temperature Change", "K"), + ("Effective Radiative Forcing|Greenhouse Gases", "W/m^2"), + ("Effective Radiative Forcing|Anthropogenic", "W/m^2"), + ("Effective Radiative Forcing|CO2", "W/m^2"), + ("Effective Radiative Forcing", "W/m^2"), + ] + + rows = [] + for run_id in run_ids: + for var, unit in variables: + row = { + "model": "test_model", + "scenario": "test_scenario", + "variable": var, + "unit": unit, + "region": "World", + "climate_model": "FaIRv1.6.2", + "run_id": run_id, + } + if "Temperature" in var: + data = np.linspace(0, 1.5, n_years) + run_id * 0.1 + else: + data = np.linspace(0, 3.0, n_years) + run_id * 0.05 + + for i, t in enumerate(time_cols): + row[t] = data[i] + + rows.append(row) + + return scmdata.ScmRun(pd.DataFrame(rows)) + + +def test_post_process_return_all_runs(tmp_path): + res_input = _build_synthetic_climate_output(run_ids=(0, 1)) + + result = post_process( + res_input, + outdir=str(tmp_path), + test_run=True, + return_all_runs=True, + ) + + assert isinstance(result, tuple) + assert len(result) == 3 + + res, res_all_runs, meta_table = result + + # res_all_runs should be an ScmRun + assert isinstance(res_all_runs, scmdata.ScmRun) + + # model names should encode run_id + models = res_all_runs.get_unique_meta("model") + assert all("|run_" in m for m in models) + + # variable names should encode climate model + variables = res_all_runs.get_unique_meta("variable") + assert all("|FaIRv1.6.2" in v for v in variables) + + # meta_table structure + assert isinstance(meta_table, pd.DataFrame) + assert "model" in meta_table.columns + assert "scenario" in meta_table.columns + assert not meta_table.duplicated(subset=["model", "scenario"]).any()