diff --git a/config/forecasters.yaml b/config/forecasters.yaml index bd11485..847a786 100644 --- a/config/forecasters.yaml +++ b/config/forecasters.yaml @@ -3,15 +3,15 @@ description: | Evaluate skill of COSMO-E emulator (M-1 forecaster). dates: - - 2020-02-03T00:00 # Storm Petra - - 2020-02-07T00:00 # Storm Sabine - - 2020-10-01T00:00 # Storm Brigitte + start: 2020-01-04T00:00 + end: 2020-01-05T00:00 + frequency: 6h runs: - forecaster: mlflow_id: d0846032fc7248a58b089cbe8fa4c511 label: M-1 forecaster - steps: 0/120/6 + steps: 0/48/6 config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml extra_dependencies: - git+https://github.com/ecmwf/anemoi-inference.git@main @@ -21,7 +21,7 @@ baselines: baseline_id: COSMO-E label: COSMO-E root: /store_new/mch/msopr/ml/COSMO-E - steps: 0/120/6 + steps: 0/48/6 analysis: label: COSMO KENDA diff --git a/resources/mec/namelist.jinja2 b/resources/mec/namelist.jinja2 new file mode 100644 index 0000000..569d82b --- /dev/null +++ b/resources/mec/namelist.jinja2 @@ -0,0 +1,76 @@ +!============================================================================== +! namelist template for MEC +!============================================================================== + + !=================== + ! general parameters + !=================== + &run + method = 'GMESTAT' ! Model Equivalent Calculator + model = 'ML' ! forecast model. One of "COSMO" "ICON" "ML" + input = './input_mod' ! input data path + data = '/oprusers/osm/opr.emme/data/' ! data path for auxiliary data + obsinput = './input_obs' ! observation input data path + output = '.' ! output data to working directory + time_ana = {{ init_time }}00 ! analysis date YYYYMMDDHHMMSS + read_fields = 'ps u t v q geof t2m td2m u_10m v_10m' + grib_edition = 2 + grib_library = 2 ! GRIB-API used: 1=GRIBEX 2=GRIB2-API + cosmo_refatm = 2 ! reference atmosphere to be used for COSMO:1or2 + fc_hours = 0 ! Default is 3h. Has to be set to 0 if one wants to verify +0h leadtime + nproc1 = 1 + nproc2 = 1 + / + + !=============================== + ! observation related parameters + !=============================== + &observations + !--------------------------------------------------- + ! read from CDFIN files (if not set use mon/cof/ekf) + !--------------------------------------------------- + read_cdfin = F ! (F): dont read COSMO CDFIN files get obs from ekf + vint_lin_t = T ! linear vertical interpolation for temperature + vint_lin_z = T ! linear vertical interpolation for geopotential + vint_lin_uv = T ! linear vertical interpolation for wind + ptop_lapse = 850. + pbot_lapse = 950. +! int_nn = T ! horizontal interpolation: nearest neighbor + / + + !==================== + ! Ensemble parameters + !==================== + &ENKF + k_enkf = 0 ! ensemble size (0 for det. run) + det_run = 1 ! set to 1 for deterministic run, 0 for ensemble + / + + !================================ + ! Verification related parameters + !================================ + &veri_obs + obstypes = "SYNOP" ! "SYNOP TEMP" + fc_times = {{ leadtimes }} ! forecast lead time at reference (hhmm) 0000,1200,2400,... + prefix_in = 'ekf' ! prefix for input files. ekf or mon + prefix_out = 'ver' + rm_old = 2 ! overwrite entries in verification file ? + fc_file = 'fc__FCR_TIME_00' ! template for forecast file name + time_range = 1 + ekf_concat = F + ref_runtype = 'any' ! accept any runtype for the reference state + / + + &report + time_b = -0029 ! (hhmm, inclusive) + time_e = 0030 ! (hhmm, exclusive) + / + + &cosmo_obs + lcd187 = .true. ! use ground based wind lidar obs + verification_start = -29 ! (min, inclusive) + verification_end = 30 ! (min, inclusive) + / + &synop_obs + version = 1 + / diff --git a/workflow/Snakefile b/workflow/Snakefile index f1d2fa5..6952eb5 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -17,6 +17,7 @@ include: "rules/verif.smk" include: "rules/report.smk" include: "rules/plot.smk" include: "rules/summary.smk" +include: "rules/verif_obs.smk" # optional messages, log and error handling @@ -38,11 +39,17 @@ onerror: EXPERIMENT_HASH = short_hash_config() +# TODO: target final scorefile instead of FF rule experiment_all: """Target rule for experiment workflow.""" input: OUT_ROOT / f"results/{EXPERIMENT_HASH}/metrics/dashboard", OUT_ROOT / f"results/{EXPERIMENT_HASH}/metrics/plots", + expand( + OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc", + init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], + run_id=collect_all_candidates(), + ), expand( OUT_ROOT / "data/runs/{run_id}/summary.md", run_id=collect_all_candidates(), @@ -101,3 +108,15 @@ rule verif_metrics_plot_all: rules.verif_metrics_plot.output, experiment=EXPERIMENT_HASH, ), + + +# To run: snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all +# clean: rm -rf ./output/data/runs/d0846032f/202001040000/mec ./output/data/runs/d0846032f/202001040000/folder_to_delete +# init_time=202001040000, run_id=collect_all_runs(), +rule verif_obs_all: + input: + expand( + rules.run_mec.output, + init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], + run_id=collect_all_candidates(), + ), diff --git a/workflow/rules/verif_obs.smk b/workflow/rules/verif_obs.smk new file mode 100644 index 0000000..adcf5a4 --- /dev/null +++ b/workflow/rules/verif_obs.smk @@ -0,0 +1,130 @@ +from pathlib import Path +from datetime import datetime, timedelta + + +def get_init_times(wc): + """ + Return list of init times (YYYYMMDDHHMM) from init_time - lead ... init_time + stepping by configured frequency (default 12h). + """ + init = wc.init_time + lt = get_leadtime(wc) # expects something like "48h" + lead_h = int(str(lt).rstrip("h")) + freq_cfg = RUN_CONFIGS[wc.run_id].get("frequency", "12h") + freq_h = int(str(freq_cfg).rstrip("h")) + base = datetime.strptime(init, "%Y%m%d%H%M") + times = [] + for h in range(lead_h, -1, -freq_h): + t = base - timedelta(hours=h) + times.append(t.strftime("%Y%m%d%H%M")) + return times + + +rule collect_mec_input: + input: + inference_dir=rules.prepare_inference_forecaster.output.grib_out_dir, + output: + run=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"), + obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"), + mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"), + params: + steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"], + init_list_str=lambda wc: " ".join(get_init_times(wc)), + run_root=lambda wc: str(OUT_ROOT / f"data/runs/{wc.run_id}"), + log: + OUT_ROOT + / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_collect_mec_input.log", + shell: + """ + ( + set -euo pipefail + echo "...time at start of collect_mec_input: $(date)" + + # create the input_obs and input_mod dirs + mkdir -p {output.run} {output.obs} {output.mod} + + # extract YYYYMM from init_time (which is YYYYMMDDHHMM) and use it in the paths + init="{wildcards.init_time}" + ym="${{init:0:6}}" + ymdh="${{init:0:10}}" + echo "init time: ${{init}}, ym: ${{ym}}" + + # collect observations (ekfSYNOP) and/or (monSYNOP from DWD; includes precip) files + cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}/ekfSYNOP.nc + cp /scratch/mch/paa/mec/MEC_ML_input/monFiles2020/hpc/uwork/swahl/temp/feedback/monSYNOP.${{init:0:10}} {output.obs}/monSYNOP.nc + + # For each source init (src_init) produce one output file named fc_ + for src_init in {params.init_list_str}; do + src_dir="{params.run_root}/$src_init/grib" + out_file="{output.mod}/fc_$src_init" + echo "creating $out_file from $src_dir" + # create/truncate out_file + : > "$out_file" + # only concat if matching files exist + if compgen -G "$src_dir/20*.grib" > /dev/null; then + cat "$src_dir"/20*.grib >> "$out_file" + else + echo "WARNING: no grib files found in $src_dir" >&2 + fi + done + + ls -l {output.mod} {output.obs} + echo "...time at end of collect_mec_input: $(date)" + ) > {log} 2>&1 + """ + + +rule generate_mec_namelist: + localrule: True + input: + script="workflow/scripts/generate_mec_namelist.py", + template="resources/mec/namelist.jinja2", + output: + namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist", + params: + steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"], + shell: + """ + uv run {input.script} \ + --steps {params.steps} \ + --init_time {wildcards.init_time} \ + --template {input.template} \ + --namelist {output.namelist} + """ + + +rule run_mec: + input: + namelist=rules.generate_mec_namelist.output.namelist, + run_dir=directory(rules.collect_mec_input.output.run), + output: + fdbk_file=OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc", + resources: + cpus_per_task=1, + runtime="1h", + log: + OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_run_mec.log", + shell: + """ + ( + set -euo pipefail + echo "...time at start of run_mec: $(date)" + + # Run MEC inside sarus container + # Note: pull command currently needed only once to download the container + # sarus pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main + abs_run_dir=$(realpath {input.run_dir}) + sarus run --mount=type=bind,source=$abs_run_dir,destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main + + # Run MEC using local executable (Alternative to sarus container) + #cd {input.run_dir} + #export LM_HOST=balfrin-ln002 + #source /oprusers/osm/opr.emme/abs/mec.env + #./mec > ./mec_out.log 2>&1 + + # move the output file to the final location for the Feedback files + mkdir -p {input.run_dir}/../../fdbk_files + cp {input.run_dir}/verSYNOP.nc {input.run_dir}/../../fdbk_files/verSYNOP_{wildcards.init_time}.nc + echo "...time at end of run_mec: $(date)" + ) > {log} 2>&1 + """ diff --git a/workflow/scripts/generate_mec_namelist.py b/workflow/scripts/generate_mec_namelist.py new file mode 100644 index 0000000..44fab72 --- /dev/null +++ b/workflow/scripts/generate_mec_namelist.py @@ -0,0 +1,68 @@ +import logging +from argparse import ArgumentParser +from datetime import datetime +from pathlib import Path + +import jinja2 + +LOG = logging.getLogger(__name__) +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) + + +def _parse_steps(steps: str) -> int: + # check that steps is in the format "start/stop/step" + if "/" not in steps: + raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'") + if len(steps.split("/")) != 3: + raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'") + start, end, step = map(int, steps.split("/")) + return list(range(start, end + 1, step)) + + +def main(args): + # Include stop_h (inclusive). Produce strings like 0000,0600,1200,...,12000 + lead_hours = args.steps + leadtimes = ",".join(f"{h:02d}00" for h in lead_hours) + + # Render template with init_time and computed leadtimes + context = {"init_time": f"{args.init_time:%Y%m%d%H%M}", "leadtimes": leadtimes} + template_path = Path(args.template) + env = jinja2.Environment(loader=jinja2.FileSystemLoader(str(template_path.parent))) + template = env.get_template(template_path.name) + namelist = template.render(**context) + LOG.info(f"MEC namelist created: {namelist}") + + out_path = Path(str(args.namelist)) + out_path.parent.mkdir(parents=True, exist_ok=True) + with out_path.open("w", encoding="utf-8") as f: + f.write(namelist) + + +if __name__ == "__main__": + parser = ArgumentParser() + + parser.add_argument("--steps", type=_parse_steps, default="0/120/6") + + parser.add_argument( + "--init_time", + type=lambda s: datetime.strptime(s, "%Y%m%d%H%M"), + default="202010010000", + help="Valid time for the data in ISO format.", + ) + + parser.add_argument( + "--template", + type=str, + ) + + parser.add_argument( + "--namelist", + type=str, + help="Anything useful", + ) + + args = parser.parse_args() + + main(args)