Skip to content

Commit fe532c6

Browse files
Merge pull request #201 from nsidc/daily-cli-overwrite-flag
Daily cli overwrite flag
2 parents d6671ac + 53fe172 commit fe532c6

File tree

6 files changed

+202
-3
lines changed

6 files changed

+202
-3
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# v2.1.0
2+
3+
* Add `--overwrite` flag for `ecdr daily` command.
4+
15
# v2.0.0
26

37
* Replace use of `AU_SI25` for AMSR2 brightness temperatures with

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[project]
22
name = "seaice_ecdr"
3-
version = "2.0.0"
3+
version = "2.1.0"
44

55
[tool.bumpversion]
6-
current_version = "2.0.0"
6+
current_version = "2.1.0"
77
commit = false
88
tag = false
99

seaice_ecdr/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from seaice_ecdr.constants import LOGS_DIR
88

9-
__version__ = "v2.0.0"
9+
__version__ = "v2.1.0"
1010

1111
# The standard loguru log levels, in increasing order of severity, are:
1212
# TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL

seaice_ecdr/cli/daily.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from seaice_ecdr.platforms.config import PROTOTYPE_PLATFORM_START_DATES_CONFIG_FILEPATH
1717
from seaice_ecdr.publish_daily import publish_daily_nc_for_dates
1818
from seaice_ecdr.spillover import LAND_SPILL_ALGS
19+
from seaice_ecdr.util import clean_outputs_for_date_range
1920

2021
_THIS_DIR = Path(__file__).parent
2122

@@ -36,12 +37,22 @@ def make_25km_ecdr(
3637
no_multiprocessing: bool,
3738
resolution: ECDR_SUPPORTED_RESOLUTIONS,
3839
land_spillover_alg: LAND_SPILL_ALGS,
40+
overwrite: bool,
3941
):
42+
if overwrite:
43+
clean_outputs_for_date_range(
44+
start_date=start_date,
45+
end_date=end_date,
46+
hemisphere=hemisphere,
47+
base_output_dir=base_output_dir,
48+
)
49+
4050
# Use the default platform dates, which excludes AMSR2
4151
if no_multiprocessing:
4252
daily_intermediate_cmd = "intermediate-daily"
4353
else:
4454
daily_intermediate_cmd = "multiprocess-intermediate-daily"
55+
4556
run_cmd(
4657
f"{CLI_EXE_PATH} {daily_intermediate_cmd}"
4758
f" --start-date {start_date:%Y-%m-%d} --end-date {end_date:%Y-%m-%d}"
@@ -151,6 +162,10 @@ def make_25km_ecdr(
151162
type=click.Choice(get_args(LAND_SPILL_ALGS)),
152163
default=DEFAULT_SPILLOVER_ALG,
153164
)
165+
@click.option(
166+
"--overwrite",
167+
is_flag=True,
168+
)
154169
def cli(
155170
*,
156171
date: dt.date,
@@ -160,6 +175,7 @@ def cli(
160175
no_multiprocessing: bool,
161176
resolution: ECDR_SUPPORTED_RESOLUTIONS,
162177
land_spillover_alg: LAND_SPILL_ALGS,
178+
overwrite: bool,
163179
):
164180
if end_date is None:
165181
end_date = copy.copy(date)
@@ -178,6 +194,7 @@ def cli(
178194
no_multiprocessing=no_multiprocessing,
179195
resolution=resolution,
180196
land_spillover_alg=land_spillover_alg,
197+
overwrite=overwrite,
181198
)
182199

183200

seaice_ecdr/tests/unit/test_util.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
from seaice_ecdr.multiprocess_intermediate_daily import get_dates_by_year
1313
from seaice_ecdr.platforms.models import SUPPORTED_PLATFORM_ID
1414
from seaice_ecdr.util import (
15+
clean_outputs_for_date_range,
1516
date_range,
1617
find_standard_monthly_netcdf_files,
18+
get_complete_output_dir,
19+
get_intermediate_output_dir,
1720
get_num_missing_pixels,
1821
nrt_daily_filename,
1922
nrt_monthly_filename,
@@ -323,3 +326,91 @@ def test_raise_error_for_dates():
323326
# If one or more dates are passed, an error should be raised.
324327
with pytest.raises(RuntimeError):
325328
raise_error_for_dates(error_dates=[dt.date(2011, 1, 1)])
329+
330+
331+
def test_clean_outputs_for_date_range(fs):
332+
hemisphere = NORTH
333+
start_date = dt.date(2025, 1, 1)
334+
end_date = dt.date(2025, 1, 2)
335+
336+
base_output_dir = Path("/output/")
337+
complete_output_dir = get_complete_output_dir(
338+
hemisphere=hemisphere,
339+
base_output_dir=base_output_dir,
340+
)
341+
342+
# for date in start_date, end_date
343+
complete_files = [
344+
complete_output_dir / "sic_psn12.5_20241226_am2_v06r00.nc",
345+
complete_output_dir / "sic_psn12.5_20241227_am2_v06r00.nc",
346+
complete_output_dir / "sic_psn12.5_20241228_am2_v06r00.nc",
347+
complete_output_dir / "sic_psn12.5_20241229_am2_v06r00.nc",
348+
complete_output_dir / "sic_psn12.5_20241230_am2_v06r00.nc",
349+
complete_output_dir / "sic_psn12.5_20241231_am2_v06r00.nc",
350+
complete_output_dir / "sic_psn12.5_20250101_am2_v06r00.nc", # target
351+
complete_output_dir / "sic_psn12.5_20250102_am2_v06r00.nc", # target
352+
complete_output_dir / "sic_psn12.5_20250103_am2_v06r00.nc",
353+
complete_output_dir / "sic_psn12.5_20250104_am2_v06r00.nc",
354+
complete_output_dir / "sic_psn12.5_20250105_am2_v06r00.nc",
355+
complete_output_dir / "sic_psn12.5_20250106_am2_v06r00.nc",
356+
complete_output_dir / "sic_psn12.5_20250107_am2_v06r00.nc",
357+
complete_output_dir / "sic_psn12.5_20250108_am2_v06r00.nc",
358+
]
359+
for complete_file in complete_files:
360+
fs.create_file(complete_file)
361+
362+
intermediate_output_dir = get_intermediate_output_dir(
363+
hemisphere=hemisphere,
364+
base_output_dir=base_output_dir,
365+
)
366+
intermediate_files = [
367+
intermediate_output_dir / "sic_psn12.5_20241226_am2_v06r00.nc",
368+
intermediate_output_dir / "sic_psn12.5_20241227_am2_v06r00.nc",
369+
intermediate_output_dir / "sic_psn12.5_20241228_am2_v06r00.nc",
370+
intermediate_output_dir / "sic_psn12.5_20241229_am2_v06r00.nc",
371+
intermediate_output_dir / "sic_psn12.5_20241230_am2_v06r00.nc",
372+
intermediate_output_dir / "sic_psn12.5_20241231_am2_v06r00.nc",
373+
intermediate_output_dir / "sic_psn12.5_20250101_am2_v06r00.nc", # target
374+
intermediate_output_dir / "sic_psn12.5_20250102_am2_v06r00.nc", # target
375+
intermediate_output_dir / "sic_psn12.5_20250103_am2_v06r00.nc",
376+
intermediate_output_dir / "sic_psn12.5_20250104_am2_v06r00.nc",
377+
intermediate_output_dir / "sic_psn12.5_20250105_am2_v06r00.nc",
378+
intermediate_output_dir / "sic_psn12.5_20250106_am2_v06r00.nc",
379+
intermediate_output_dir / "sic_psn12.5_20250107_am2_v06r00.nc",
380+
intermediate_output_dir / "sic_psn12.5_20250108_am2_v06r00.nc",
381+
]
382+
for intermediate_file in intermediate_files:
383+
fs.create_file(intermediate_file)
384+
385+
clean_outputs_for_date_range(
386+
hemisphere=hemisphere,
387+
base_output_dir=base_output_dir,
388+
start_date=start_date,
389+
end_date=end_date,
390+
)
391+
392+
expected_complete_files = [
393+
complete_output_dir / "sic_psn12.5_20241226_am2_v06r00.nc",
394+
complete_output_dir / "sic_psn12.5_20241227_am2_v06r00.nc",
395+
complete_output_dir / "sic_psn12.5_20241228_am2_v06r00.nc",
396+
complete_output_dir / "sic_psn12.5_20241229_am2_v06r00.nc",
397+
complete_output_dir / "sic_psn12.5_20241230_am2_v06r00.nc",
398+
complete_output_dir / "sic_psn12.5_20241231_am2_v06r00.nc",
399+
complete_output_dir / "sic_psn12.5_20250103_am2_v06r00.nc",
400+
complete_output_dir / "sic_psn12.5_20250104_am2_v06r00.nc",
401+
complete_output_dir / "sic_psn12.5_20250105_am2_v06r00.nc",
402+
complete_output_dir / "sic_psn12.5_20250106_am2_v06r00.nc",
403+
complete_output_dir / "sic_psn12.5_20250107_am2_v06r00.nc",
404+
complete_output_dir / "sic_psn12.5_20250108_am2_v06r00.nc",
405+
]
406+
407+
expected_intermediate_files = [
408+
intermediate_output_dir / "sic_psn12.5_20241226_am2_v06r00.nc",
409+
intermediate_output_dir / "sic_psn12.5_20250108_am2_v06r00.nc",
410+
]
411+
412+
actual_complete_files = list(complete_output_dir.rglob("*.nc"))
413+
actual_intermediate_files = list(intermediate_output_dir.rglob("*.nc"))
414+
415+
assert set(actual_complete_files) == set(expected_complete_files)
416+
assert set(actual_intermediate_files) == set(expected_intermediate_files)

seaice_ecdr/util.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import numpy as np
77
import pandas as pd
88
import xarray as xr
9+
from loguru import logger
910
from pm_tb_data._types import Hemisphere
1011

1112
from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS
@@ -360,3 +361,89 @@ def get_complete_output_dir(
360361
)
361362

362363
return complete_dir
364+
365+
366+
def _clean_complete_files_for_date_range(
367+
*,
368+
base_output_dir: Path,
369+
hemisphere: Hemisphere,
370+
start_date: dt.date,
371+
end_date: dt.date,
372+
):
373+
complete_dir = get_complete_output_dir(
374+
base_output_dir=base_output_dir,
375+
hemisphere=hemisphere,
376+
)
377+
378+
for date in date_range(start_date=start_date, end_date=end_date):
379+
complete_files = list(complete_dir.rglob(f"sic_*_{date:%Y%m%d}_*.nc"))
380+
if complete_files:
381+
if len(complete_files) != 1:
382+
raise RuntimeError(
383+
"Found multiple matching 'complete' files. Expected only 1."
384+
f" ({complete_files})"
385+
)
386+
logger.info(f"Cleaning up {complete_files[0]}")
387+
complete_files[0].unlink()
388+
389+
390+
def _clean_intermediate_files_for_date_range(
391+
*,
392+
base_output_dir: Path,
393+
hemisphere: Hemisphere,
394+
start_date: dt.date,
395+
end_date: dt.date,
396+
interp_range: int = 5,
397+
):
398+
intermediate_dir = get_intermediate_output_dir(
399+
base_output_dir=base_output_dir,
400+
hemisphere=hemisphere,
401+
)
402+
403+
# Account for temporal interpolation at the edges.
404+
start_date = start_date - dt.timedelta(days=interp_range)
405+
end_date = end_date + dt.timedelta(days=interp_range)
406+
for date in date_range(start_date=start_date, end_date=end_date):
407+
intermediate_files = list(intermediate_dir.rglob(f"*_{date:%Y%m%d}_*.nc"))
408+
for intermediate_file in intermediate_files:
409+
logger.info(f"Cleaning up {intermediate_file}")
410+
intermediate_file.unlink()
411+
412+
413+
def clean_outputs_for_date_range(
414+
*,
415+
base_output_dir: Path,
416+
hemisphere: Hemisphere,
417+
start_date: dt.date,
418+
end_date: dt.date,
419+
interp_range: int = 5,
420+
):
421+
"""Removes all outputs related to a given date of data.
422+
423+
* Removes the complete, published data file for this date.
424+
* Removes all intermediate files for this date, and all intermediate files
425+
for adjacent dates to account for temporal interpolation (interp_range).
426+
427+
This function DOES NOT account for melt onset, which could draw data from a
428+
much longer timeseries of intermediate files. This funciton is primarily
429+
aimed at supporting daily data procesing of G02202 and G10016. Because these
430+
both get produced on a daily basis, any data delays/gaps that later get
431+
filled need to be easily filled with the new data. This ensures that
432+
intermediate files (which may show all NaN for dates that had been missing)
433+
get cleaned up and that the final published output file is using the latest
434+
available data.
435+
"""
436+
_clean_complete_files_for_date_range(
437+
base_output_dir=base_output_dir,
438+
hemisphere=hemisphere,
439+
start_date=start_date,
440+
end_date=end_date,
441+
)
442+
443+
_clean_intermediate_files_for_date_range(
444+
base_output_dir=base_output_dir,
445+
hemisphere=hemisphere,
446+
start_date=start_date,
447+
end_date=end_date,
448+
interp_range=interp_range,
449+
)

0 commit comments

Comments
 (0)