Skip to content

Commit be77e02

Browse files
authored
Merge pull request #93 from GeoscienceAustralia/NPI-4067-check-sp3-column-alignment
NPI-4067 Improvements to SP3 validation and unit tests
2 parents 6471a83 + 5846860 commit be77e02

File tree

7 files changed

+536
-169
lines changed

7 files changed

+536
-169
lines changed

gnssanalysis/filenames.py

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ def determine_sp3_name_props(
685685
name_props = {}
686686
# First, properties from the SP3 data:
687687
try:
688-
sp3_df = gn_io.sp3.read_sp3(file_path, nodata_to_nan=False, format_check_strictness=strict_mode)
688+
sp3_df = gn_io.sp3.read_sp3(file_path, nodata_to_nan=False, strict_mode=strict_mode)
689689
except Exception as e:
690690
# TODO: Work out what exceptions read_sp3 can actually throw when given a non-SP3 file
691691
if strict_mode == StrictModes.STRICT_RAISE:
@@ -769,26 +769,27 @@ def determine_sp3_name_props(
769769
def determine_properties_from_filename(
770770
filename: str,
771771
expect_long_filenames: bool = False,
772-
reject_long_term_products: bool = False,
772+
reject_long_term_products: bool = True,
773773
strict_mode: type[StrictMode] = StrictModes.STRICT_WARN,
774774
include_compressed_flag: bool = False,
775-
) -> Union[Dict[str, Any], None]:
775+
) -> dict[str, Any]:
776776
"""Determine IGS filename properties based purely on a filename
777777
778778
This function does its best to support both IGS long filenames and old short filenames.
779779
Similar to other name property detection functions, it returns a dictionary containing
780780
the name properties it manages to successfully determine.
781781
782782
:param str filename: filename to examine for naming properties
783-
:param bool expect_long_filenames: expect provided filenames to conform to IGS long product filename
784-
convention (v2.1), and raise / error if they do not.
785-
:param bool reject_long_term_products: raise exception if an IGS Long Term Product is encountered (these have
786-
no timerange / period, and include an end_epoch).
787-
:param type[StrictMode] strict_mode: indicates whether to raise or warn, if filename is clearly not valid / a
788-
format we support.
789-
:param bool include_compressed_flag: include a flag in output, indicating if the filename indicated
790-
compression (.gz)
791-
:return Dict[str, Any]: dictionary containing the extracted name properties
783+
:param bool expect_long_filenames: (off by default for backwards compatibility) expect provided filenames to
784+
conform to IGS long product filename convention (v2.1), and raise / error if they do not.
785+
:param bool reject_long_term_products: (on by default for backwards compatibility) raise warning or exception if
786+
an IGS Long Term Product is encountered (these have no timerange / period, and include an end_epoch).
787+
:param type[StrictMode] strict_mode: indicates whether to raise or warn (default), if filename is clearly
788+
not valid / a format we support.
789+
:param bool include_compressed_flag: (off by default for backwards compatibility) include a flag in output,
790+
indicating if the filename indicated compression (.gz).
791+
:return dict[str, Any]: dictionary containing the extracted name properties. Will be empty on errors, when
792+
strict_mode is set to WARN (default).
792793
:raises ValueError: if filename seems invalid / unsupported, E.g. if it is too long to be a short filename, but
793794
doesn't match long filename regex
794795
"""
@@ -798,7 +799,7 @@ def determine_properties_from_filename(
798799
raise ValueError(f"Filename too long (over 51 chars): '{filename}'")
799800
if strict_mode == StrictModes.STRICT_WARN:
800801
warnings.warn(f"Filename too long (over 51 chars): '{filename}'")
801-
return None
802+
return {}
802803

803804
# Filename isn't too long...
804805
# If we're expecting a long format filename, is it too short?
@@ -807,7 +808,7 @@ def determine_properties_from_filename(
807808
raise ValueError(f"IGS long filename can't be <38 chars: '{filename}'. expect_long_filenames is on")
808809
if strict_mode == StrictModes.STRICT_WARN:
809810
warnings.warn(f"IGS long filename can't be <38 chars: '{filename}'. expect_long_filenames is on")
810-
return None
811+
return {}
811812

812813
match_long = _RE_IGS_LONG_FILENAME.fullmatch(filename)
813814
if match_long is not None:
@@ -853,7 +854,7 @@ def determine_properties_from_filename(
853854
raise ValueError(f"Long Term Product encountered: '{filename}' and reject_long_term_products is on")
854855
if strict_mode == StrictModes.STRICT_WARN:
855856
warnings.warn(f"Long Term Product encountered: '{filename}' and reject_long_term_products is on")
856-
return None
857+
return {}
857858

858859
start_epoch = datetime.datetime( # Lacks hour and minute precision in LTP version
859860
year=int(match_long["year"]),
@@ -885,7 +886,7 @@ def determine_properties_from_filename(
885886
raise ValueError(f"Expecting an IGS format long product name, but regex didn't match: '{filename}'")
886887
if strict_mode == StrictModes.STRICT_WARN:
887888
warnings.warn(f"Expecting an IGS format long product name, but regex didn't match: '{filename}'")
888-
return None
889+
return {}
889890

890891
# Is it plausibly a short filename?
891892
if len(filename) >= 38:
@@ -894,7 +895,7 @@ def determine_properties_from_filename(
894895
raise ValueError(f"Long filename parse failed, but >=38 chars is too long for 'short': '{filename}'")
895896
if strict_mode == StrictModes.STRICT_WARN:
896897
warnings.warn(f"Long filename parse failed, but >=38 chars is too long for 'short': '{filename}'")
897-
return None
898+
return {}
898899

899900
# Try to simplistically parse as short filename as last resort.
900901

@@ -946,7 +947,9 @@ def determine_properties_from_filename(
946947

947948

948949
def check_filename_and_contents_consistency(
949-
input_file: pathlib.Path, ignore_single_epoch_short: bool = True
950+
input_file: pathlib.Path,
951+
ignore_single_epoch_short: bool = True,
952+
output_orphan_prop_names: bool = False,
950953
) -> Mapping[str, tuple[str, str]]:
951954
"""
952955
Checks that the content of the provided file matches what its filename says should be in it.
@@ -961,6 +964,10 @@ def check_filename_and_contents_consistency(
961964
File properties which do not match are returned as a mapping of str -> tuple(str, str), taking the form
962965
property_name > filename_derived_value, file_contents_derived_value
963966
:param Path input_file: Path to the file to be checked.
967+
:param bool ignore_single_epoch_short: (on by default) consider it ok for file content to be one epoch short of
968+
what the filename says.
969+
:param bool output_orphan_prop_names: (off by default) for properties found exclusively in file content or name
970+
(not in both, and therefore not compared), return these as 'prop_name': None.
964971
:return Mapping[str, tuple[str,str]]: Empty map if properties agree, else map of discrepancies, OR None on failure.
965972
of property_name > filename_derived_value, file_contents_derived_value.
966973
:raises NotImplementedError: if called with a file type not yet supported.
@@ -987,7 +994,20 @@ def check_filename_and_contents_consistency(
987994
)
988995

989996
discrepancies = {}
990-
for key in file_name_properties.keys():
997+
# Check for keys only present on one side
998+
orphan_keys = set(file_name_properties.keys()).symmetric_difference((set(file_content_properties.keys())))
999+
logging.warning(
1000+
"The following properties can't be compared, as they were extracted only from file content or "
1001+
f"name (not both): {str(orphan_keys)}"
1002+
)
1003+
if output_orphan_prop_names:
1004+
# Output properties found only in content OR filename.
1005+
for orphan_key in orphan_keys:
1006+
discrepancies[orphan_key] = None
1007+
1008+
mutual_keys = set(file_name_properties.keys()).difference(orphan_keys)
1009+
# For keys present in both dicts, compare values.
1010+
for key in mutual_keys:
9911011
if (file_name_val := file_name_properties[key]) != (file_content_val := file_content_properties[key]):
9921012
# If enabled, and epoch interval successfully extracted, ignore cases where the timespan of epochs in the
9931013
# file content, is one epoch shorter than the timespan the filename implies (e.g. 23:55 vs 1D i.e. 24:00).

gnssanalysis/gn_datetime.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def j2000_to_igs_dt(j2000_secs: _np.ndarray) -> _np.ndarray:
382382
time_h = _pd.Series((hour - day).astype("int64").astype(str)).str.rjust(3).values
383383
time_m = _pd.Series((minute - hour).astype("int64").astype(str)).str.rjust(3).values
384384
# Width 12 due to one extra leading space (for easier concatenation next), then _0.00000000 format per SP3d spec:
385-
time_s = (_pd.Series((datetime - minute)).view("int64") / 1e9).apply("{:.8f}".format).str.rjust(12).values
385+
time_s = (_pd.Series((datetime - minute)).astype("int64") / 1e9).apply("{:.8f}".format).str.rjust(12).values
386386
return date_y + date_m + date_d + time_h + time_m + time_s
387387

388388

0 commit comments

Comments
 (0)