Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c1375ab
Initial draft (pseudo code)
dnerini Oct 7, 2025
9f608f2
add namelist as resource
Nov 13, 2025
e82bd94
add verif_obs.smk to Snakefile
Nov 13, 2025
c3ab651
Add rules for observation data and namelist generation (using fake data)
Nov 13, 2025
7512d96
add newline to namelist template
Nov 13, 2025
13301a5
somewhat working version of run_mec (with fake data)
Nov 13, 2025
e722e5f
correct typo and add optional script for generating namelist, in case…
Nov 24, 2025
3d9e3c1
fix: add localrule to inference_interpolator rule (#57)
frazane Oct 8, 2025
918913f
Fix for interpolator rule
OpheliaMiralles Oct 8, 2025
179eb4d
Consolidate multi packages into unique src/ dir (#58)
dnerini Oct 14, 2025
e791a30
Update configs (#63)
OpheliaMiralles Oct 15, 2025
d197712
Adopt 'steps' instead of 'lead_time' (#62)
dnerini Oct 20, 2025
9568987
Update example config for experiment with interpolators (#70)
dnerini Oct 20, 2025
128eb91
Distinguish between primary runs ('candidates') and secondary runs (#64)
dnerini Oct 20, 2025
6315afc
Adopt forecast intervals including the end point (#71)
dnerini Oct 21, 2025
e028f59
Mrb 550 inconcsistent forecast initializations in evalml (#72)
jonasbhend Oct 21, 2025
5406777
Update vega-lite spec (#69)
frazane Oct 22, 2025
8d01490
Decouple inference preparation and execution (#68)
frazane Oct 22, 2025
b7b1311
Scores by Region (#75)
jonasbhend Oct 29, 2025
04c4cf1
input data and namelist for MEC
Dec 11, 2025
b1959dc
Merge remote-tracking branch 'origin/main' into MRB-534-Implement-rul…
dnerini Dec 11, 2025
23c9599
Cleanup
dnerini Dec 11, 2025
804455a
Refactor MEC namelist generation
Dec 11, 2025
f793d85
setup MEC case
Dec 18, 2025
3839476
add use of local MEC executable and cleaning
Jan 7, 2026
5b58b7a
Support of mec in a sarus container
Jan 8, 2026
292878d
target final feedback files
Jan 15, 2026
09f06da
Fix linting
Jan 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions config/forecasters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ description: |
Evaluate skill of COSMO-E emulator (M-1 forecaster).

dates:
- 2020-02-03T00:00 # Storm Petra
- 2020-02-07T00:00 # Storm Sabine
- 2020-10-01T00:00 # Storm Brigitte
start: 2020-01-04T00:00
end: 2020-01-05T00:00
frequency: 6h

runs:
- forecaster:
mlflow_id: d0846032fc7248a58b089cbe8fa4c511
label: M-1 forecaster
steps: 0/120/6
steps: 0/48/6
config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml
extra_dependencies:
- git+https://github.com/ecmwf/anemoi-inference.git@main
Expand All @@ -21,7 +21,7 @@ baselines:
baseline_id: COSMO-E
label: COSMO-E
root: /store_new/mch/msopr/ml/COSMO-E
steps: 0/120/6
steps: 0/48/6

analysis:
label: COSMO KENDA
Expand Down
76 changes: 76 additions & 0 deletions resources/mec/namelist.jinja2
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
!==============================================================================
! namelist template for MEC
!==============================================================================

!===================
! general parameters
!===================
&run
method = 'GMESTAT' ! Model Equivalent Calculator
model = 'ML' ! forecast model. One of "COSMO" "ICON" "ML"
input = './input_mod' ! input data path
data = '/oprusers/osm/opr.emme/data/' ! data path for auxiliary data
obsinput = './input_obs' ! observation input data path
output = '.' ! output data to working directory
time_ana = {{ init_time }}00 ! analysis date YYYYMMDDHHMMSS
read_fields = 'ps u t v q geof t2m td2m u_10m v_10m'
grib_edition = 2
grib_library = 2 ! GRIB-API used: 1=GRIBEX 2=GRIB2-API
cosmo_refatm = 2 ! reference atmosphere to be used for COSMO:1or2
fc_hours = 0 ! Default is 3h. Has to be set to 0 if one wants to verify +0h leadtime
nproc1 = 1
nproc2 = 1
/

!===============================
! observation related parameters
!===============================
&observations
!---------------------------------------------------
! read from CDFIN files (if not set use mon/cof/ekf)
!---------------------------------------------------
read_cdfin = F ! (F): dont read COSMO CDFIN files get obs from ekf
vint_lin_t = T ! linear vertical interpolation for temperature
vint_lin_z = T ! linear vertical interpolation for geopotential
vint_lin_uv = T ! linear vertical interpolation for wind
ptop_lapse = 850.
pbot_lapse = 950.
! int_nn = T ! horizontal interpolation: nearest neighbor
/

!====================
! Ensemble parameters
!====================
&ENKF
k_enkf = 0 ! ensemble size (0 for det. run)
det_run = 1 ! set to 1 for deterministic run, 0 for ensemble
/

!================================
! Verification related parameters
!================================
&veri_obs
obstypes = "SYNOP" ! "SYNOP TEMP"
fc_times = {{ leadtimes }} ! forecast lead time at reference (hhmm) 0000,1200,2400,...
prefix_in = 'ekf' ! prefix for input files. ekf or mon
prefix_out = 'ver'
rm_old = 2 ! overwrite entries in verification file ?
fc_file = 'fc__FCR_TIME_00' ! template for forecast file name
time_range = 1
ekf_concat = F
ref_runtype = 'any' ! accept any runtype for the reference state
/

&report
time_b = -0029 ! (hhmm, inclusive)
time_e = 0030 ! (hhmm, exclusive)
/

&cosmo_obs
lcd187 = .true. ! use ground based wind lidar obs
verification_start = -29 ! (min, inclusive)
verification_end = 30 ! (min, inclusive)
/
&synop_obs
version = 1
/
19 changes: 19 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ include: "rules/verif.smk"
include: "rules/report.smk"
include: "rules/plot.smk"
include: "rules/summary.smk"
include: "rules/verif_obs.smk"


# optional messages, log and error handling
Expand All @@ -38,11 +39,17 @@ onerror:
EXPERIMENT_HASH = short_hash_config()


# TODO: target final scorefile instead of FF
rule experiment_all:
"""Target rule for experiment workflow."""
input:
OUT_ROOT / f"results/{EXPERIMENT_HASH}/metrics/dashboard",
OUT_ROOT / f"results/{EXPERIMENT_HASH}/metrics/plots",
expand(
OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
run_id=collect_all_candidates(),
),
expand(
OUT_ROOT / "data/runs/{run_id}/summary.md",
run_id=collect_all_candidates(),
Expand Down Expand Up @@ -101,3 +108,15 @@ rule verif_metrics_plot_all:
rules.verif_metrics_plot.output,
experiment=EXPERIMENT_HASH,
),


# To run: snakemake --cores 1 --configfile=config/forecasters.yaml verif_obs_all
# clean: rm -rf ./output/data/runs/d0846032f/202001040000/mec ./output/data/runs/d0846032f/202001040000/folder_to_delete
# init_time=202001040000, run_id=collect_all_runs(),
rule verif_obs_all:
input:
expand(
rules.run_mec.output,
init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
run_id=collect_all_candidates(),
),
130 changes: 130 additions & 0 deletions workflow/rules/verif_obs.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from pathlib import Path
from datetime import datetime, timedelta


def get_init_times(wc):
"""
Return list of init times (YYYYMMDDHHMM) from init_time - lead ... init_time
stepping by configured frequency (default 12h).
"""
init = wc.init_time
lt = get_leadtime(wc) # expects something like "48h"
lead_h = int(str(lt).rstrip("h"))
freq_cfg = RUN_CONFIGS[wc.run_id].get("frequency", "12h")
freq_h = int(str(freq_cfg).rstrip("h"))
base = datetime.strptime(init, "%Y%m%d%H%M")
times = []
for h in range(lead_h, -1, -freq_h):
t = base - timedelta(hours=h)
times.append(t.strftime("%Y%m%d%H%M"))
return times


rule collect_mec_input:
input:
inference_dir=rules.prepare_inference_forecaster.output.grib_out_dir,
output:
run=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec"),
obs=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_obs"),
mod=directory(OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/input_mod"),
params:
steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
init_list_str=lambda wc: " ".join(get_init_times(wc)),
run_root=lambda wc: str(OUT_ROOT / f"data/runs/{wc.run_id}"),
log:
OUT_ROOT
/ "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_collect_mec_input.log",
shell:
"""
(
set -euo pipefail
echo "...time at start of collect_mec_input: $(date)"

# create the input_obs and input_mod dirs
mkdir -p {output.run} {output.obs} {output.mod}

# extract YYYYMM from init_time (which is YYYYMMDDHHMM) and use it in the paths
init="{wildcards.init_time}"
ym="${{init:0:6}}"
ymdh="${{init:0:10}}"
echo "init time: ${{init}}, ym: ${{ym}}"

# collect observations (ekfSYNOP) and/or (monSYNOP from DWD; includes precip) files
cp /store_new/mch/msopr/osm/KENDA-1/EKF/${{ym}}/ekfSYNOP_${{init}}00.nc {output.obs}/ekfSYNOP.nc
cp /scratch/mch/paa/mec/MEC_ML_input/monFiles2020/hpc/uwork/swahl/temp/feedback/monSYNOP.${{init:0:10}} {output.obs}/monSYNOP.nc

# For each source init (src_init) produce one output file named fc_<src_init>
for src_init in {params.init_list_str}; do
src_dir="{params.run_root}/$src_init/grib"
out_file="{output.mod}/fc_$src_init"
echo "creating $out_file from $src_dir"
# create/truncate out_file
: > "$out_file"
# only concat if matching files exist
if compgen -G "$src_dir/20*.grib" > /dev/null; then
cat "$src_dir"/20*.grib >> "$out_file"
else
echo "WARNING: no grib files found in $src_dir" >&2
fi
done

ls -l {output.mod} {output.obs}
echo "...time at end of collect_mec_input: $(date)"
) > {log} 2>&1
"""


rule generate_mec_namelist:
localrule: True
input:
script="workflow/scripts/generate_mec_namelist.py",
template="resources/mec/namelist.jinja2",
output:
namelist=OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/namelist",
params:
steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
shell:
"""
uv run {input.script} \
--steps {params.steps} \
--init_time {wildcards.init_time} \
--template {input.template} \
--namelist {output.namelist}
"""


rule run_mec:
input:
namelist=rules.generate_mec_namelist.output.namelist,
run_dir=directory(rules.collect_mec_input.output.run),
output:
fdbk_file=OUT_ROOT / "data/runs/{run_id}/fdbk_files/verSYNOP_{init_time}.nc",
resources:
cpus_per_task=1,
runtime="1h",
log:
OUT_ROOT / "data/runs/{run_id}/{init_time}/mec/{run_id}-{init_time}_run_mec.log",
shell:
"""
(
set -euo pipefail
echo "...time at start of run_mec: $(date)"

# Run MEC inside sarus container
# Note: pull command currently needed only once to download the container
# sarus pull container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main
abs_run_dir=$(realpath {input.run_dir})
sarus run --mount=type=bind,source=$abs_run_dir,destination=/src/bin2 --mount=type=bind,source=/oprusers/osm/opr.emme/data/,destination=/oprusers/osm/opr.emme/data/ container-registry.meteoswiss.ch/mecctr/mec-container:0.1.0-main

# Run MEC using local executable (Alternative to sarus container)
#cd {input.run_dir}
#export LM_HOST=balfrin-ln002
#source /oprusers/osm/opr.emme/abs/mec.env
#./mec > ./mec_out.log 2>&1

# move the output file to the final location for the Feedback files
mkdir -p {input.run_dir}/../../fdbk_files
cp {input.run_dir}/verSYNOP.nc {input.run_dir}/../../fdbk_files/verSYNOP_{wildcards.init_time}.nc
echo "...time at end of run_mec: $(date)"
) > {log} 2>&1
"""
68 changes: 68 additions & 0 deletions workflow/scripts/generate_mec_namelist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import logging
from argparse import ArgumentParser
from datetime import datetime
from pathlib import Path

import jinja2

LOG = logging.getLogger(__name__)
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)


def _parse_steps(steps: str) -> int:
# check that steps is in the format "start/stop/step"
if "/" not in steps:
raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
if len(steps.split("/")) != 3:
raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
start, end, step = map(int, steps.split("/"))
return list(range(start, end + 1, step))


def main(args):
# Include stop_h (inclusive). Produce strings like 0000,0600,1200,...,12000
lead_hours = args.steps
leadtimes = ",".join(f"{h:02d}00" for h in lead_hours)

# Render template with init_time and computed leadtimes
context = {"init_time": f"{args.init_time:%Y%m%d%H%M}", "leadtimes": leadtimes}
template_path = Path(args.template)
env = jinja2.Environment(loader=jinja2.FileSystemLoader(str(template_path.parent)))
template = env.get_template(template_path.name)
namelist = template.render(**context)
LOG.info(f"MEC namelist created: {namelist}")

out_path = Path(str(args.namelist))
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", encoding="utf-8") as f:
f.write(namelist)


if __name__ == "__main__":
parser = ArgumentParser()

parser.add_argument("--steps", type=_parse_steps, default="0/120/6")

parser.add_argument(
"--init_time",
type=lambda s: datetime.strptime(s, "%Y%m%d%H%M"),
default="202010010000",
help="Valid time for the data in ISO format.",
)

parser.add_argument(
"--template",
type=str,
)

parser.add_argument(
"--namelist",
type=str,
help="Anything useful",
)

args = parser.parse_args()

main(args)
Loading