Replies: 1 comment 12 replies
-
2026-03-31 ChrysalisWe're testing the What changes are we testing?The latest expected results are based on the (2026-03-27 test) run.
Testing processSet up environmentslcrc_conda # Activate conda
lcrc-quota # Have enough space
# Set up e3sm_diags env
cd ~/ez/e3sm_diags
git status
# On branch main
# nothing to commit, working tree clean
git fetch upstream main
git reset --hard upstream/main
git log --oneline | head -n 1
# b1e98504 Fix Python 3.14 stall due to repeated climo opens without close (#1042)
# Good, matches https://github.com/E3SM-Project/e3sm_diags/commits/main
rm -rf build
conda clean --all --y
conda env create -f conda-env/dev.yml -n test-diags-main-20260331
conda activate test-diags-main-20260331
python -m pip install .
# Set up mpas_analysis env for the first time
cd ~/ez/MPAS-Analysis/
git status
# On branch develop
# nothing to commit, working tree clean
git fetch upstream develop
git reset --hard upstream/develop
git log --oneline | head -n 1
# 0eb9ef491 Merge branch 'xylar/fix-non-editable-install' into develop
# Good, matches https://github.com/MPAS-Dev/MPAS-Analysis/commits/develop/
rm -rf build
conda clean --all --y
head -n 2 dev-spec.txt
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
conda create --name test-mpas-analysis-develop-20260331 --file dev-spec.txt
conda activate test-mpas-analysis-develop-20260331
python -m pip install .
# LIVVKit
# We have no instructions to create our own livvkit environment
# We will simply use the provided one:
# source /lcrc/group/e3sm/livvkit/software/load_e3sm_unified_1.12.1_lex.sh
# Set up zppy-interfaces env
cd ~/ez/zppy-interfaces
git status
# On branch main
# nothing to commit, working tree clean
git fetch upstream main
git reset --hard upstream/main
git log --oneline | head -n 1
# 02638f4 Add Python 3.14 support (#44)
# Good, matches https://github.com/E3SM-Project/zppy-interfaces/commits/main
rm -rf build
conda clean --all --y
conda env create -f conda/dev.yml -n test-zi-main-20260331
conda activate test-zi-main-20260331
python -m pip install .
pytest tests/unit/global_time_series/test_*.py # 10 passed in 21.89s
pytest tests/unit/pcmdi_diags/test_*.py # 7 passed in 8.88szppy run# zppy itself #################################################################
cd ~/ez/zppy
git status
# On branch fix-legacy-310-tests
# nothing to commit, working tree clean
git fetch upstream main
git checkout -b test-zppy-main-20260331 upstream/main
git log --oneline | head -n 1
# 0ad4774a Fix legacy 3.1.0 www paths (#801)
# Good, matches https://github.com/E3SM-Project/zppy/commits/main
git log --oneline | head -n 3
# 0ad4774a Fix legacy 3.1.0 www paths (#801)
# 30c2cee0 Add mpas_analysis_subsection parameter to global_time_series (#788)
# 016d220b Merge pull request #783 from mkstratos/add-livvkit
rm -rf build
conda clean --all --y
conda env create -f conda/dev.yml -n test-zppy-main-20260331
conda activate test-zppy-main-20260331
pre-commit run --all-files
python -m pip install .
pytest tests/test_*.py # 44 passed in 0.89s
# Edit tests/integration/utils.py:TEST_SPECIFICS: Dict[str, Any] = {
# These are custom environment_commands for specific tasks.
# Never set these to "", because they will print the line
# `environment_commands = ""` for the corresponding task,
# thus overriding the value set higher up in the cfg.
# That is, there will be no environment set.
# (`environment_commands = ""` only redirects to Unified
# if specified under the [default] task)
"diags_environment_commands": "source /gpfs/fs1/home/ac.forsyth2/miniforge3/etc/profile.d/conda.sh; conda activate test-diags-main-20260331",
"mpas_analysis_environment_commands": "source /gpfs/fs1/home/ac.forsyth2/miniforge3/etc/profile.d/conda.sh; conda activate test-mpas-analysis-develop-20260331",
"global_time_series_environment_commands": "source /gpfs/fs1/home/ac.forsyth2/miniforge3/etc/profile.d/conda.sh; conda activate test-zi-main-20260331",
"livvkit_environment_commands": "source /lcrc/group/e3sm/livvkit/software/load_e3sm_unified_1.12.1_lex.sh",
"pcmdi_diags_environment_commands": "source /gpfs/fs1/home/ac.forsyth2/miniforge3/etc/profile.d/conda.sh; conda activate test-zi-main-20260331",
# This is the environment setup for other tasks.
# Leave as "" to use the latest Unified environment.
"environment_commands": "source /lcrc/soft/climate/e3sm-unified/load_latest_e3sm_unified_chrysalis.sh",
# For a complete test, run the set of latest cfgs and at least one set of legacy cfgs
"cfgs_to_run": [
"weekly_bundles",
"weekly_comprehensive_v2",
"weekly_comprehensive_v3",
"weekly_legacy_3.1.0_bundles",
"weekly_legacy_3.1.0_comprehensive_v2",
"weekly_legacy_3.1.0_comprehensive_v3",
"weekly_legacy_3.0.0_bundles",
"weekly_legacy_3.0.0_comprehensive_v2",
"weekly_legacy_3.0.0_comprehensive_v3",
],
"tasks_to_run": [
"e3sm_diags",
"mpas_analysis",
"global_time_series",
"ilamb",
"livvkit",
"pcmdi_diags",
],
"unique_id": "zppy_main_branch_test_20260331",
}git diff # Diff looks good
python tests/integration/utils.py
# CFG FILES HAVE BEEN GENERATED FROM TEMPLATES WITH THESE SETTINGS:
# UNIQUE_ID=zppy_main_branch_test_20260331
# diags_environment_commands=source /gpfs/fs1/home/ac.forsyth2/miniforge3/etc/profile.d/conda.sh; conda activate test-diags-main-20260331
# mpas_analysis_environment_commands=source /gpfs/fs1/home/ac.forsyth2/miniforge3/etc/profile.d/conda.sh; conda activate test-mpas-analysis-develop-20260331
# global_time_series_environment_commands=source /gpfs/fs1/home/ac.forsyth2/miniforge3/etc/profile.d/conda.sh; conda activate test-zi-main-20260331
# livvkit_environment_commands=source /lcrc/group/e3sm/livvkit/software/load_e3sm_unified_1.12.1_lex.sh
# pcmdi_diags_environment_commands=source /gpfs/fs1/home/ac.forsyth2/miniforge3/etc/profile.d/conda.sh; conda activate test-zi-main-20260331
# environment_commands=source /lcrc/soft/climate/e3sm-unified/load_latest_e3sm_unified_chrysalis.sh
# Reminder: `environment_commands=''` => the latest E3SM Unified environment will be used
# For reference:
alias sq
# alias sq='sqa -u ac.forsyth2'
alias sqa
# alias sqa='squeue -o "%8u %.7a %.4D %.9P %7i %.2t %.10r %.10M %.10l %j" --sort=P,-t,-p'
sq
# No jobs currently queued
zppy -c tests/integration/generated/test_weekly_bundles_chrysalis.cfg
zppy -c tests/integration/generated/test_weekly_comprehensive_v2_chrysalis.cfg
zppy -c tests/integration/generated/test_weekly_comprehensive_v3_chrysalis.cfg
zppy -c tests/integration/generated/test_weekly_legacy_3.1.0_bundles_chrysalis.cfg
zppy -c tests/integration/generated/test_weekly_legacy_3.1.0_comprehensive_v2_chrysalis.cfg
zppy -c tests/integration/generated/test_weekly_legacy_3.1.0_comprehensive_v3_chrysalis.cfg
zppy -c tests/integration/generated/test_weekly_legacy_3.0.0_bundles_chrysalis.cfg
zppy -c tests/integration/generated/test_weekly_legacy_3.0.0_comprehensive_v2_chrysalis.cfg
zppy -c tests/integration/generated/test_weekly_legacy_3.0.0_comprehensive_v3_chrysalis.cfg
sq | wc -l # Tue 3/31 15:54 => 231 - header row = 230 jobs
sq | wc -l # Tue 3/31 20:52 => 20 - header row = 19 jobs
sq
# ac.forsy e3sm 4 compute 1209408 R None 4:58:21 7:00:00 bundle1
# ac.forsy e3sm 4 compute 1209328 R None 4:58:51 7:00:00 bundle1
# ac.forsy e3sm 4 compute 1209239 R None 4:59:51 7:00:00 bundle1
# ac.forsy e3sm 1 compute 1209433 R None 4:34:21 5:00:00 e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1982-1983
# ac.forsy e3sm 1 compute 1209434 R None 4:34:51 5:00:00 e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1980-1981_vs_1980-1981
# ac.forsy e3sm 1 compute 1209353 R None 4:36:51 5:00:00 e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1982-1983
# ac.forsy e3sm 1 compute 1209354 R None 4:37:21 5:00:00 e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1980-1981_vs_1980-1981
# ac.forsy e3sm 1 compute 1209463 R None 4:37:21 5:00:00 e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1987-1988
# ac.forsy e3sm 1 compute 1209264 R None 4:38:51 5:00:00 e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1982-1983
# ac.forsy e3sm 1 compute 1209265 R None 4:38:51 5:00:00 e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1980-1981_vs_1980-1981
# ac.forsy e3sm 1 compute 1209389 R None 4:39:51 5:00:00 e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1987-1988
# ac.forsy e3sm 1 compute 1209308 R None 4:41:21 5:00:00 e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1987-1988_vs_1985-1986
# ac.forsy e3sm 1 compute 1209307 R None 4:41:51 5:00:00 e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1987-1988
# ac.forsy e3sm 1 compute 1209465 R None 4:43:21 5:00:00 e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1987-1988_vs_1985-1986
# ac.forsy e3sm 1 compute 1209435 R None 4:45:21 5:00:00 e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1982-1983_vs_1980-1981
# ac.forsy e3sm 1 compute 1209391 R None 4:47:51 5:00:00 e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1987-1988_vs_1985-1986
# ac.forsy e3sm 1 compute 1209355 R None 4:49:51 5:00:00 e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1982-1983_vs_1980-1981
# ac.forsy e3sm 1 compute 1209309 R None 4:54:21 5:00:00 e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1987-1988_vs_1985-1986
# ac.forsy e3sm 1 compute 1209266 R None 4:56:21 5:00:00 e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1982-1983_vs_1980-1981
# Something seems wrong; why are all these tasks taking 4+ hours?
# Picking up 4/1 ##############################################################
sq | wc -l # Wed 4/1 08:04 => 1 - header row = 0 jobsBundles part 2# Check on bundles status
cd /lcrc/group/e3sm/ac.forsyth2/zppy_weekly_bundles_output/zppy_main_branch_test_20260331/v3.LR.historical_0051/post/scripts
grep -v "OK" *status
# bundle1.status:RUNNING 1209239
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1985-1986.status:RUNNING 1209239
tail bundle1.o1209239
# 2026-03-31 18:04:35,013 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:870) >> Getting dataset with derivation function
# 2026-03-31 18:04:35,019 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:877) >> Successfully derived variable 'PRECT' for test climatology dataset.
# 2026-03-31 18:04:35,167 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:863) >> Deriving the ref climatology variable using the source variables: ('sat_gauge_precip',)
# 2026-03-31 18:04:35,175 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:870) >> Getting dataset with derivation function
# 2026-03-31 18:04:35,177 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:877) >> Successfully derived variable 'PRECT' for ref climatology dataset.
# srun: Job step aborted: Waiting up to 92 seconds for job step to finish.
# slurmstepd: error: *** STEP 1209239.358 ON chr-0208 CANCELLED AT 2026-04-01T00:52:58 DUE TO TIME LIMIT ***
# slurmstepd: error: *** JOB 1209239 ON chr-0208 CANCELLED AT 2026-04-01T00:52:58 DUE TO TIME LIMIT ***
# srun: got SIGCONT
# srun: forcing job termination
cd /lcrc/group/e3sm/ac.forsyth2/zppy_weekly_legacy_3.1.0_bundles_output/zppy_main_branch_test_20260331/v3.LR.historical_0051/post/scripts
grep -v "OK" *status
# bundle1.status:RUNNING 1209328
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1985-1986.status:RUNNING 1209328
tail bundle1.o1209328
# 2026-03-31 18:04:30,373 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:877) >> Successfully derived variable 'U' for ref climatology dataset.
# 2026-03-31 18:04:31,916 [INFO]: polar_driver.py(_run_diags_3d:209) >> Selected pressure level(s): [850.0]
# 2026-03-31 18:04:32,804 [INFO]: regrid.py(subset_and_align_datasets:70) >> Selected region: polar_S
# 2026-03-31 18:04:38,285 [INFO]: io.py(_save_data_metrics_and_plots:167) >> Metrics saved in model_vs_obs_1985-1986/polar/ERA5/ERA5-U-850-DJF-polar_S.json
# 2026-03-31 18:04:42,601 [INFO]: utils.py(_save_main_plot:122) >> Plot saved in: model_vs_obs_1985-1986/polar/ERA5/ERA5-U-850-DJF-polar_S.png
# slurmstepd: error: *** STEP 1209328.358 ON chr-0081 CANCELLED AT 2026-04-01T00:53:58 DUE TO TIME LIMIT ***
# slurmstepd: error: *** JOB 1209328 ON chr-0081 CANCELLED AT 2026-04-01T00:53:58 DUE TO TIME LIMIT ***
# srun: Job step aborted: Waiting up to 92 seconds for job step to finish.
# srun: got SIGCONT
# srun: forcing job termination
cd /lcrc/group/e3sm/ac.forsyth2/zppy_weekly_legacy_3.0.0_bundles_output/zppy_main_branch_test_20260331/v3.LR.historical_0051/post/scripts
grep -v "OK" *status
# bundle1.status:RUNNING 1209408
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1985-1986.status:RUNNING 1209408
tail bundle1.o1209408
# 2026-03-31 18:06:33,748 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:870) >> Getting dataset with derivation function
# 2026-03-31 18:06:33,750 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:877) >> Successfully derived variable 'PRECT' for ref climatology dataset.
# 2026-03-31 18:06:35,225 [INFO]: regrid.py(subset_and_align_datasets:70) >> Selected region: polar_N
# 2026-03-31 18:06:37,094 [INFO]: io.py(_save_data_metrics_and_plots:167) >> Metrics saved in model_vs_obs_1985-1986/polar/GPCP_v3.2/GPCP_v3.2-PRECT-ANN-polar_N.json
# 2026-03-31 18:06:40,883 [INFO]: utils.py(_save_main_plot:122) >> Plot saved in: model_vs_obs_1985-1986/polar/GPCP_v3.2/GPCP_v3.2-PRECT-ANN-polar_N.png
# srun: Job step aborted: Waiting up to 92 seconds for job step to finish.
# slurmstepd: error: *** STEP 1209408.358 ON chr-0087 CANCELLED AT 2026-04-01T00:54:28 DUE TO TIME LIMIT ***
# slurmstepd: error: *** JOB 1209408 ON chr-0087 CANCELLED AT 2026-04-01T00:54:28 DUE TO TIME LIMIT ***
# srun: got SIGCONT
# srun: forcing job termination
# Can't proceed with running bundles part 2.Review finished runs### v2 ###
cd /lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v2_output/zppy_main_branch_test_20260331/v2.LR.historical_0201/post/scripts
grep -v "OK" *status
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1982-1983.status:RUNNING 1209264
# e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1980-1981_vs_1980-1981.status:RUNNING 1209265
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1982-1983_vs_1980-1981.status:RUNNING 1209266
tail e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1982-1983.o1209264
# 2026-03-31 19:12:35,703 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:870) >> Getting dataset with derivation function
# 2026-03-31 19:12:35,710 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:877) >> Successfully derived variable 'TAUXY' for test climatology dataset.
# 2026-03-31 19:12:35,784 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:863) >> Deriving the ref climatology variable using the source variables: ('tauu', 'tauv')
# 2026-03-31 19:12:35,813 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:870) >> Getting dataset with derivation function
# 2026-03-31 19:12:35,822 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:877) >> Successfully derived variable 'TAUXY' for ref climatology dataset.
# srun: Job step aborted: Waiting up to 92 seconds for job step to finish.
# slurmstepd: error: *** STEP 1209264.0 ON chr-0096 CANCELLED AT 2026-03-31T23:13:57 DUE TO TIME LIMIT ***
# slurmstepd: error: *** JOB 1209264 ON chr-0096 CANCELLED AT 2026-03-31T23:13:57 DUE TO TIME LIMIT ***
# srun: got SIGCONT
# srun: forcing job termination
tail e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1980-1981_vs_1980-1981.o1209265
# 2026-03-31 18:28:23,355 [INFO]: polar_driver.py(run_diag:41) >> Variable: FLDS
# 2026-03-31 18:28:27,492 [INFO]: regrid.py(subset_and_align_datasets:70) >> Selected region: polar_S
# 2026-03-31 18:28:27,697 [INFO]: io.py(_save_data_metrics_and_plots:167) >> Metrics saved in model_vs_model_1980-1981/polar/model_vs_model/v2.LR.historical_0201-FLDS-DJF-polar_S.json
# 2026-03-31 18:28:28,745 [INFO]: utils.py(_save_main_plot:122) >> Plot saved in: model_vs_model_1980-1981/polar/model_vs_model/v2.LR.historical_0201-FLDS-DJF-polar_S.png
# 2026-03-31 18:28:28,745 [INFO]: polar_driver.py(run_diag:41) >> Variable: FLDS
# slurmstepd: error: *** STEP 1209265.0 ON chr-0253 CANCELLED AT 2026-03-31T23:13:57 DUE TO TIME LIMIT ***
# srun: Job step aborted: Waiting up to 92 seconds for job step to finish.
# slurmstepd: error: *** JOB 1209265 ON chr-0253 CANCELLED AT 2026-03-31T23:13:57 DUE TO TIME LIMIT ***
# srun: got SIGCONT
# srun: forcing job termination
tail e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1982-1983_vs_1980-1981.o1209266
# 2026-03-31 18:01:16,035 [INFO]: dataset_xr.py(_get_land_sea_mask:1627) >> No land sea mask datasets were found for the given season.
# 2026-03-31 18:01:16,036 [INFO]: dataset_xr.py(_get_default_land_sea_mask_dataset:1674) >> Using default land sea mask located at `/gpfs/fs1/home/ac.forsyth2/miniforge3/envs/test-diags-main-20260331/share/e3sm_diags/acme_ne30_ocean_land_mask.nc`.
# 2026-03-31 18:01:16,224 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:863) >> Deriving the ref climatology variable using the source variables: ('FIRA_U',)
# 2026-03-31 18:01:16,279 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:870) >> Getting dataset with derivation function
# 2026-03-31 18:01:16,281 [INFO]: dataset_xr.py(_get_dataset_with_derived_climo_var:877) >> Successfully derived variable 'FIRA_U' for ref climatology dataset.
# srun: Job step aborted: Waiting up to 92 seconds for job step to finish.
# slurmstepd: error: *** STEP 1209266.0 ON chr-0121 CANCELLED AT 2026-03-31T22:56:27 DUE TO TIME LIMIT ***
# slurmstepd: error: *** JOB 1209266 ON chr-0121 CANCELLED AT 2026-03-31T22:56:27 DUE TO TIME LIMIT ***
# srun: got SIGCONT
# srun: forcing job termination
cd /lcrc/group/e3sm/ac.forsyth2/zppy_weekly_legacy_3.1.0_comprehensive_v2_output/zppy_main_branch_test_20260331/v2.LR.historical_0201/post/scripts
grep -v "OK" *status
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1982-1983.status:RUNNING 1209353
# e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1980-1981_vs_1980-1981.status:RUNNING 1209354
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1982-1983_vs_1980-1981.status:RUNNING 1209355
grep -l -n "DUE TO TIME LIMIT" *.o*
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1982-1983.o1209353
# e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1980-1981_vs_1980-1981.o1209354
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1982-1983_vs_1980-1981.o1209355
cd /lcrc/group/e3sm/ac.forsyth2/zppy_weekly_legacy_3.0.0_comprehensive_v2_output/zppy_main_branch_test_20260331/v2.LR.historical_0201/post/scripts
grep -v "OK" *status
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1982-1983.status:RUNNING 1209433
# e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1980-1981_vs_1980-1981.status:RUNNING 1209434
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1982-1983_vs_1980-1981.status:RUNNING 1209435
grep -l -n "DUE TO TIME LIMIT" *.o*
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1982-1983.o1209433
# e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1980-1981_vs_1980-1981.o1209434
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1982-1983_vs_1980-1981.o1209435
### v3 ###
cd /lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v3_output/zppy_main_branch_test_20260331/v3.LR.historical_0051/post/scripts
grep -v "OK" *status
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1987-1988.status:RUNNING 1209307
# e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1987-1988_vs_1985-1986.status:RUNNING 1209308
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1987-1988_vs_1985-1986.status:RUNNING 1209309
grep -l -n "DUE TO TIME LIMIT" *.o*
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1987-1988.o1209307
# e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1987-1988_vs_1985-1986.o1209308
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1987-1988_vs_1985-1986.o1209309
cd /lcrc/group/e3sm/ac.forsyth2/zppy_weekly_legacy_3.1.0_comprehensive_v3_output/zppy_main_branch_test_20260331/v3.LR.historical_0051/post/scripts
grep -v "OK" *status
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1987-1988.status:RUNNING 1209389
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1987-1988_vs_1985-1986.status:RUNNING 1209391
grep -l -n "DUE TO TIME LIMIT" *.o*
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1987-1988.o1209389
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1987-1988_vs_1985-1986.o1209391
cd /lcrc/group/e3sm/ac.forsyth2/zppy_weekly_legacy_3.0.0_comprehensive_v3_output/zppy_main_branch_test_20260331/v3.LR.historical_0051/post/scripts
grep -v "OK" *status
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1987-1988.status:RUNNING 1209463
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1987-1988_vs_1985-1986.status:RUNNING 1209465
grep -l -n "DUE TO TIME LIMIT" *.o*
# e3sm_diags_atm_monthly_180x360_aave_model_vs_obs_1987-1988.o1209463
# e3sm_diags_lnd_monthly_mvm_lnd_model_vs_model_1987-1988_vs_1985-1986.o1209465
(Note my test doesn't include the latest Results analysisI gave my time limit errors and ChatGPT responseShort answer: the “fix” removed a resource leak, but it also destroyed all the implicit caching/reuse that was making things fast — so now you’re re-opening and re-reading NetCDF data over and over, which blows up runtime and hits the wall clock limit. What changed (the critical behavior shift)The diff aggressively closes datasets everywhere:
This is consistent with the intent noted in the docstring:
So the design goal was: Why this caused the slowdown1. You killed xarray/dask’s lazy reusePreviously:
Now:
👉 Net effect: massive repeated I/O 2.
|
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
I'm starting the testing log for E3SM Unified 1.13.0 (as a follow-up to the 1.12.0 log) rather than using the regular (weekly log).
However, in the beginning threads we'll still be using dev environments. The idea is that these will set the expected results (on Chrysalis, Perlmutter, Compy) to use during the Unified 1.13.0 testing period.
Beta Was this translation helpful? Give feedback.
All reactions