Skip to content

Commit 7aa7975

Browse files
authored
Merge pull request #792 from dbekaert/timesubset_combine
Add support to subset files in time for stats analysis
2 parents ffd385b + c349554 commit 7aa7975

File tree

6 files changed

+86
-10
lines changed

6 files changed

+86
-10
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
3535
* [731](https://github.com/dbekaert/RAiDER/pull/731) - Fixed fetch routine for GMAO.
3636

3737
### Added
38+
* [792](https://github.com/dbekaert/RAiDER/pull/792) - Added temporal subsetting to `raiderCombine.py` workflow to more seamlessly support annual statistical analyses.
3839
* [790](https://github.com/dbekaert/RAiDER/pull/790) - Added a test in `test_interpolator.py` to put test coverage to 100% and linted file.
3940
* [789](https://github.com/dbekaert/RAiDER/pull/789) - Introduce `min_pct_days` option to filter stations based on global days percentage.
4041
* [788](https://github.com/dbekaert/RAiDER/pull/788) - Updated `variance_analysis` function to include global date tracking parameters and modified datetime handling for station start and end dates.

environment.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# create environment : conda env create -f environment.yml
2-
# update dependencies: conda env update -f environment.yml
2+
# update dependencies: conda env update -f environment.yml --prune
33
# remove environment : conda env remove -n RAiDER
44
# enter environment : conda activate RAiDER
55
# exit environment : conda deactivate
@@ -19,6 +19,7 @@ dependencies:
1919
- dask
2020
- dem_stitcher>=2.5.8
2121
- ecmwf-api-client
22+
- geopandas
2223
- h5netcdf
2324
- h5py
2425
- herbie-data<2025.2.1
@@ -30,6 +31,7 @@ dependencies:
3031
- pandas
3132
- progressbar
3233
- pydap>3.2.2
34+
- pyogrio
3335
- pyproj>=2.2.0
3436
- pyyaml
3537
- rasterio>=1.3.0

tools/RAiDER/cli/raider.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,10 @@ def combineZTDFiles() -> None:
710710
print(f"Observation error threshold: {args.obs_errlimit}")
711711
print(f"Nan for negative σ_wm² values: {args.allow_nan_for_negative}")
712712
print(f"Min% timespan overlap to keep station: {args.min_pct_days}")
713+
print(
714+
"Subset in time by specified earliest to latest "
715+
f"YYYY-MM-DD dates: {args.timeinterval}"
716+
)
713717

714718
if not args.raider_file.exists():
715719
combineDelayFiles(args.raider_file, loc=args.raider_folder)
@@ -733,6 +737,7 @@ def combineZTDFiles() -> None:
733737
obs_errlimit=args.obs_errlimit,
734738
allow_nan_for_negative=args.allow_nan_for_negative,
735739
min_pct_days=args.min_pct_days,
740+
timeinterval=args.timeinterval,
736741
)
737742

738743

tools/RAiDER/getStationDelays.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ def get_station_data(inFile, dateList, gps_repo=None, numCPUs=8, outDir=None, re
274274
df.to_csv(name, index=False)
275275
else:
276276
logger.warning(
277-
f"Station file {name} not found likely"
277+
f"Station file {name} not found likely "
278278
"no available data in specified time span"
279279
)
280280

tools/RAiDER/gnss/processDelayFiles.py

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,19 @@
44
import glob
55
import math
66
import re
7+
import shutil
8+
from itertools import chain
79
from pathlib import Path
810
from textwrap import dedent
9-
from typing import Optional
11+
from typing import List, Optional, Union
1012

1113
# Third-party
1214
import numpy as np
1315
import pandas as pd
1416
from tqdm import tqdm
1517

1618
# Local
17-
from RAiDER.cli.parser import add_verbose, add_allow_nan_options
19+
from RAiDER.cli.parser import add_allow_nan_options, add_verbose
1820
from RAiDER.logger import logger
1921

2022

@@ -23,12 +25,25 @@
2325

2426
def combineDelayFiles(
2527
out_path: Path,
26-
loc: Path=Path.cwd(),
28+
loc: Union[List[Path], Path] = Path.cwd(),
2729
source: str='model',
2830
ext: str='.csv',
2931
ref: Optional[Path]=None,
3032
col_name: str='ZTD'
3133
) -> None:
34+
35+
# Normalize single Path to List
36+
# e.g. Path('folder') -> [Path('folder')]
37+
if isinstance(loc, Path):
38+
loc = [loc]
39+
40+
# Flatten nested lists if they exist
41+
# e.g. [[Path('A')], [Path('B')]] -> [Path('A'), Path('B')]
42+
# This checks if the list is not empty AND the first item is a list
43+
if loc and isinstance(loc[0], list):
44+
loc = list(chain.from_iterable(loc))
45+
46+
# Now 'loc' is guaranteed to be flat: [Path, Path, ...]
3247
file_paths = [f for folder in loc for f in folder.glob(f"*{ext}")]
3348

3449
if source == 'model':
@@ -38,7 +53,6 @@ def combineDelayFiles(
3853
# If single file, just copy source
3954
if len(file_paths) == 1:
4055
if source == 'model':
41-
import shutil
4256
shutil.copy(file_paths[0], out_path)
4357
else:
4458
file_paths = readZTDFile(file_paths[0], col_name=col_name)
@@ -548,6 +562,7 @@ def create_parser() -> argparse.ArgumentParser:
548562
"""),
549563
type=parse_dir,
550564
default=[Path.cwd()],
565+
nargs='+' # Forces input into a list [Path, Path...]
551566
)
552567
p.add_argument(
553568
'--gnssDir',
@@ -560,6 +575,7 @@ def create_parser() -> argparse.ArgumentParser:
560575
"""),
561576
type=parse_dir,
562577
default=[Path.cwd()],
578+
nargs='+' # Forces input into a list [Path, Path...]
563579
)
564580

565581
p.add_argument(
@@ -643,6 +659,18 @@ def create_parser() -> argparse.ArgumentParser:
643659
default=0.0,
644660
)
645661

662+
p.add_argument(
663+
'--timeinterval',
664+
'-ti',
665+
dest='timeinterval',
666+
type=str,
667+
help=dedent("""\
668+
Subset in time by specifying earliest YYYY-MM-DD date
669+
followed by latest date YYYY-MM-DD.
670+
-- Example : '2016-01-01 2019-01-01'."""),
671+
default=None,
672+
)
673+
646674
# add other args to parser
647675
add_allow_nan_options(p)
648676
add_verbose(p)
@@ -660,6 +688,7 @@ def main(
660688
obs_errlimit: float=float('inf'),
661689
allow_nan_for_negative: bool=True,
662690
min_pct_days: float=0.0,
691+
timeinterval: str=None,
663692
):
664693
"""Merge a combined RAiDER delays file with a GPS ZTD delay file."""
665694
print(f'Merging delay files {raider_file} and {ztd_file}')
@@ -668,6 +697,29 @@ def main(
668697
dfz = pd.read_csv(ztd_file, parse_dates=['Datetime'])
669698
dfr = pd.read_csv(raider_file, parse_dates=['Datetime'])
670699

700+
# time-interval filter
701+
# need to add a day buffer to account for time changes
702+
if timeinterval:
703+
# Parse the time interval string
704+
start_str, end_str = timeinterval.split()
705+
706+
# Convert to datetime objects and apply the 1-day buffer
707+
# Subtract 1 day from start, Add 1 day to end
708+
start_date = pd.to_datetime(start_str)
709+
end_date = pd.to_datetime(end_str)
710+
start_date_buffer = start_date - pd.Timedelta(days=1)
711+
end_date_buffer = end_date + pd.Timedelta(days=1)
712+
713+
# apply time filter
714+
dfz = dfz[
715+
(dfz['Datetime'] >= start_date_buffer) &
716+
(dfz['Datetime'] <= end_date_buffer)
717+
].reset_index(drop=True)
718+
dfr = dfr[
719+
(dfr['Datetime'] >= start_date_buffer) &
720+
(dfr['Datetime'] <= end_date_buffer)
721+
].reset_index(drop=True)
722+
671723
# drop extra columns from tropo delay file
672724
expected_data_columns = ['ID', 'Lat', 'Lon', 'Hgt_m', 'Datetime', 'wetDelay', 'hydroDelay', raider_delay]
673725
dfr = dfr.drop(columns=[col for col in dfr if col not in expected_data_columns])
@@ -715,6 +767,18 @@ def main(
715767
dfz = pass_common_obs(dfr, dfz, localtime='Localtime')
716768
dfr = pass_common_obs(dfz, dfr, localtime='Localtime')
717769

770+
# use time-interval again to filter based on 'Localtime'
771+
# to remove straggling observations outside of specified span
772+
if timeinterval:
773+
dfz = dfz[
774+
(dfz['Localtime'] >= start_date) &
775+
(dfz['Localtime'] <= end_date)
776+
].reset_index(drop=True)
777+
dfr = dfr[
778+
(dfr['Localtime'] >= start_date) &
779+
(dfr['Localtime'] <= end_date)
780+
].reset_index(drop=True)
781+
718782
# drop all lines with nans
719783
dfr.dropna(how='any', inplace=True)
720784
dfz.dropna(how='any', inplace=True)

tools/RAiDER/gnss/types.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
import argparse
22
from pathlib import Path
3-
from typing import Optional
4-
3+
from typing import List, Optional
54

65
class RAiDERCombineArgs(argparse.Namespace):
76
raider_file: Path
8-
raider_folder: Path
9-
gnss_folder: Path
7+
raider_folder: List[Path]
8+
gnss_folder: List[Path]
109
gnss_file: Optional[Path]
1110
raider_column_name: str
1211
column_name: str
1312
out_name: Path
1413
local_time: Optional[str]
14+
obs_errlimit: float
15+
min_pct_days: float
16+
timeinterval: Optional[str]
17+
allow_nan_for_negative: bool
18+
verbose: bool

0 commit comments

Comments
 (0)