Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# v2.1.0

* Add `--overwrite` flag for `ecdr daily` command.

# v2.0.0

* Replace use of `AU_SI25` for AMSR2 brightness temperatures with
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[project]
name = "seaice_ecdr"
version = "2.0.0"
version = "2.1.0"

[tool.bumpversion]
current_version = "2.0.0"
current_version = "2.1.0"
commit = false
tag = false

Expand Down
2 changes: 1 addition & 1 deletion seaice_ecdr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from seaice_ecdr.constants import LOGS_DIR

__version__ = "v2.0.0"
__version__ = "v2.1.0"

# The standard loguru log levels, in increasing order of severity, are:
# TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL
Expand Down
17 changes: 17 additions & 0 deletions seaice_ecdr/cli/daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from seaice_ecdr.platforms.config import PROTOTYPE_PLATFORM_START_DATES_CONFIG_FILEPATH
from seaice_ecdr.publish_daily import publish_daily_nc_for_dates
from seaice_ecdr.spillover import LAND_SPILL_ALGS
from seaice_ecdr.util import clean_outputs_for_date_range

_THIS_DIR = Path(__file__).parent

Expand All @@ -36,12 +37,22 @@ def make_25km_ecdr(
no_multiprocessing: bool,
resolution: ECDR_SUPPORTED_RESOLUTIONS,
land_spillover_alg: LAND_SPILL_ALGS,
overwrite: bool,
):
if overwrite:
clean_outputs_for_date_range(
start_date=start_date,
end_date=end_date,
hemisphere=hemisphere,
base_output_dir=base_output_dir,
)

# Use the default platform dates, which excludes AMSR2
if no_multiprocessing:
daily_intermediate_cmd = "intermediate-daily"
else:
daily_intermediate_cmd = "multiprocess-intermediate-daily"

run_cmd(
f"{CLI_EXE_PATH} {daily_intermediate_cmd}"
f" --start-date {start_date:%Y-%m-%d} --end-date {end_date:%Y-%m-%d}"
Expand Down Expand Up @@ -151,6 +162,10 @@ def make_25km_ecdr(
type=click.Choice(get_args(LAND_SPILL_ALGS)),
default=DEFAULT_SPILLOVER_ALG,
)
@click.option(
"--overwrite",
is_flag=True,
)
def cli(
*,
date: dt.date,
Expand All @@ -160,6 +175,7 @@ def cli(
no_multiprocessing: bool,
resolution: ECDR_SUPPORTED_RESOLUTIONS,
land_spillover_alg: LAND_SPILL_ALGS,
overwrite: bool,
):
if end_date is None:
end_date = copy.copy(date)
Expand All @@ -178,6 +194,7 @@ def cli(
no_multiprocessing=no_multiprocessing,
resolution=resolution,
land_spillover_alg=land_spillover_alg,
overwrite=overwrite,
)


Expand Down
91 changes: 91 additions & 0 deletions seaice_ecdr/tests/unit/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
from seaice_ecdr.multiprocess_intermediate_daily import get_dates_by_year
from seaice_ecdr.platforms.models import SUPPORTED_PLATFORM_ID
from seaice_ecdr.util import (
clean_outputs_for_date_range,
date_range,
find_standard_monthly_netcdf_files,
get_complete_output_dir,
get_intermediate_output_dir,
get_num_missing_pixels,
nrt_daily_filename,
nrt_monthly_filename,
Expand Down Expand Up @@ -323,3 +326,91 @@ def test_raise_error_for_dates():
# If one or more dates are passed, an error should be raised.
with pytest.raises(RuntimeError):
raise_error_for_dates(error_dates=[dt.date(2011, 1, 1)])


def test_clean_outputs_for_date_range(fs):
hemisphere = NORTH
start_date = dt.date(2025, 1, 1)
end_date = dt.date(2025, 1, 2)

base_output_dir = Path("/output/")
complete_output_dir = get_complete_output_dir(
hemisphere=hemisphere,
base_output_dir=base_output_dir,
)

# for date in start_date, end_date
complete_files = [
complete_output_dir / "sic_psn12.5_20241226_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241227_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241228_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241229_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241230_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241231_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250101_am2_v06r00.nc", # target
complete_output_dir / "sic_psn12.5_20250102_am2_v06r00.nc", # target
complete_output_dir / "sic_psn12.5_20250103_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250104_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250105_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250106_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250107_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250108_am2_v06r00.nc",
]
for complete_file in complete_files:
fs.create_file(complete_file)

intermediate_output_dir = get_intermediate_output_dir(
hemisphere=hemisphere,
base_output_dir=base_output_dir,
)
intermediate_files = [
intermediate_output_dir / "sic_psn12.5_20241226_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20241227_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20241228_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20241229_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20241230_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20241231_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20250101_am2_v06r00.nc", # target
intermediate_output_dir / "sic_psn12.5_20250102_am2_v06r00.nc", # target
intermediate_output_dir / "sic_psn12.5_20250103_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20250104_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20250105_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20250106_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20250107_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20250108_am2_v06r00.nc",
]
for intermediate_file in intermediate_files:
fs.create_file(intermediate_file)

clean_outputs_for_date_range(
hemisphere=hemisphere,
base_output_dir=base_output_dir,
start_date=start_date,
end_date=end_date,
)

expected_complete_files = [
complete_output_dir / "sic_psn12.5_20241226_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241227_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241228_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241229_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241230_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20241231_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250103_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250104_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250105_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250106_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250107_am2_v06r00.nc",
complete_output_dir / "sic_psn12.5_20250108_am2_v06r00.nc",
]

expected_intermediate_files = [
intermediate_output_dir / "sic_psn12.5_20241226_am2_v06r00.nc",
intermediate_output_dir / "sic_psn12.5_20250108_am2_v06r00.nc",
]

actual_complete_files = list(complete_output_dir.rglob("*.nc"))
actual_intermediate_files = list(intermediate_output_dir.rglob("*.nc"))

assert set(actual_complete_files) == set(expected_complete_files)
assert set(actual_intermediate_files) == set(expected_intermediate_files)
87 changes: 87 additions & 0 deletions seaice_ecdr/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import pandas as pd
import xarray as xr
from loguru import logger
from pm_tb_data._types import Hemisphere

from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS
Expand Down Expand Up @@ -360,3 +361,89 @@ def get_complete_output_dir(
)

return complete_dir


def _clean_complete_files_for_date_range(
*,
base_output_dir: Path,
hemisphere: Hemisphere,
start_date: dt.date,
end_date: dt.date,
):
complete_dir = get_complete_output_dir(
base_output_dir=base_output_dir,
hemisphere=hemisphere,
)

for date in date_range(start_date=start_date, end_date=end_date):
complete_files = list(complete_dir.rglob(f"sic_*_{date:%Y%m%d}_*.nc"))
if complete_files:
if len(complete_files) != 1:
raise RuntimeError(
"Found multiple matching 'complete' files. Expected only 1."
f" ({complete_files})"
)
logger.info(f"Cleaning up {complete_files[0]}")
complete_files[0].unlink()


def _clean_intermediate_files_for_date_range(
*,
base_output_dir: Path,
hemisphere: Hemisphere,
start_date: dt.date,
end_date: dt.date,
interp_range: int = 5,
):
intermediate_dir = get_intermediate_output_dir(
base_output_dir=base_output_dir,
hemisphere=hemisphere,
)

# Account for temporal interpolation at the edges.
start_date = start_date - dt.timedelta(days=interp_range)
end_date = end_date + dt.timedelta(days=interp_range)
for date in date_range(start_date=start_date, end_date=end_date):
intermediate_files = list(intermediate_dir.rglob(f"*_{date:%Y%m%d}_*.nc"))
for intermediate_file in intermediate_files:
logger.info(f"Cleaning up {intermediate_file}")
intermediate_file.unlink()


def clean_outputs_for_date_range(
*,
base_output_dir: Path,
hemisphere: Hemisphere,
start_date: dt.date,
end_date: dt.date,
interp_range: int = 5,
):
"""Removes all outputs related to a given date of data.

* Removes the complete, published data file for this date.
* Removes all intermediate files for this date, and all intermediate files
for adjacent dates to account for temporal interpolation (interp_range).

This function DOES NOT account for melt onset, which could draw data from a
much longer timeseries of intermediate files. This funciton is primarily
aimed at supporting daily data procesing of G02202 and G10016. Because these
both get produced on a daily basis, any data delays/gaps that later get
filled need to be easily filled with the new data. This ensures that
intermediate files (which may show all NaN for dates that had been missing)
get cleaned up and that the final published output file is using the latest
available data.
"""
_clean_complete_files_for_date_range(
base_output_dir=base_output_dir,
hemisphere=hemisphere,
start_date=start_date,
end_date=end_date,
)

_clean_intermediate_files_for_date_range(
base_output_dir=base_output_dir,
hemisphere=hemisphere,
start_date=start_date,
end_date=end_date,
interp_range=interp_range,
)
Loading