diff --git a/gnssanalysis/filenames.py b/gnssanalysis/filenames.py index 81cdc7a..972ebcb 100644 --- a/gnssanalysis/filenames.py +++ b/gnssanalysis/filenames.py @@ -685,7 +685,7 @@ def determine_sp3_name_props( name_props = {} # First, properties from the SP3 data: try: - sp3_df = gn_io.sp3.read_sp3(file_path, nodata_to_nan=False, format_check_strictness=strict_mode) + sp3_df = gn_io.sp3.read_sp3(file_path, nodata_to_nan=False, strict_mode=strict_mode) except Exception as e: # TODO: Work out what exceptions read_sp3 can actually throw when given a non-SP3 file if strict_mode == StrictModes.STRICT_RAISE: @@ -769,10 +769,10 @@ def determine_sp3_name_props( def determine_properties_from_filename( filename: str, expect_long_filenames: bool = False, - reject_long_term_products: bool = False, + reject_long_term_products: bool = True, strict_mode: type[StrictMode] = StrictModes.STRICT_WARN, include_compressed_flag: bool = False, -) -> Union[Dict[str, Any], None]: +) -> dict[str, Any]: """Determine IGS filename properties based purely on a filename This function does its best to support both IGS long filenames and old short filenames. @@ -780,15 +780,16 @@ def determine_properties_from_filename( the name properties it manages to successfully determine. :param str filename: filename to examine for naming properties - :param bool expect_long_filenames: expect provided filenames to conform to IGS long product filename - convention (v2.1), and raise / error if they do not. - :param bool reject_long_term_products: raise exception if an IGS Long Term Product is encountered (these have - no timerange / period, and include an end_epoch). - :param type[StrictMode] strict_mode: indicates whether to raise or warn, if filename is clearly not valid / a - format we support. - :param bool include_compressed_flag: include a flag in output, indicating if the filename indicated - compression (.gz) - :return Dict[str, Any]: dictionary containing the extracted name properties + :param bool expect_long_filenames: (off by default for backwards compatibility) expect provided filenames to + conform to IGS long product filename convention (v2.1), and raise / error if they do not. + :param bool reject_long_term_products: (on by default for backwards compatibility) raise warning or exception if + an IGS Long Term Product is encountered (these have no timerange / period, and include an end_epoch). + :param type[StrictMode] strict_mode: indicates whether to raise or warn (default), if filename is clearly + not valid / a format we support. + :param bool include_compressed_flag: (off by default for backwards compatibility) include a flag in output, + indicating if the filename indicated compression (.gz). + :return dict[str, Any]: dictionary containing the extracted name properties. Will be empty on errors, when + strict_mode is set to WARN (default). :raises ValueError: if filename seems invalid / unsupported, E.g. if it is too long to be a short filename, but doesn't match long filename regex """ @@ -798,7 +799,7 @@ def determine_properties_from_filename( raise ValueError(f"Filename too long (over 51 chars): '{filename}'") if strict_mode == StrictModes.STRICT_WARN: warnings.warn(f"Filename too long (over 51 chars): '{filename}'") - return None + return {} # Filename isn't too long... # If we're expecting a long format filename, is it too short? @@ -807,7 +808,7 @@ def determine_properties_from_filename( raise ValueError(f"IGS long filename can't be <38 chars: '{filename}'. expect_long_filenames is on") if strict_mode == StrictModes.STRICT_WARN: warnings.warn(f"IGS long filename can't be <38 chars: '{filename}'. expect_long_filenames is on") - return None + return {} match_long = _RE_IGS_LONG_FILENAME.fullmatch(filename) if match_long is not None: @@ -853,7 +854,7 @@ def determine_properties_from_filename( raise ValueError(f"Long Term Product encountered: '{filename}' and reject_long_term_products is on") if strict_mode == StrictModes.STRICT_WARN: warnings.warn(f"Long Term Product encountered: '{filename}' and reject_long_term_products is on") - return None + return {} start_epoch = datetime.datetime( # Lacks hour and minute precision in LTP version year=int(match_long["year"]), @@ -885,7 +886,7 @@ def determine_properties_from_filename( raise ValueError(f"Expecting an IGS format long product name, but regex didn't match: '{filename}'") if strict_mode == StrictModes.STRICT_WARN: warnings.warn(f"Expecting an IGS format long product name, but regex didn't match: '{filename}'") - return None + return {} # Is it plausibly a short filename? if len(filename) >= 38: @@ -894,7 +895,7 @@ def determine_properties_from_filename( raise ValueError(f"Long filename parse failed, but >=38 chars is too long for 'short': '{filename}'") if strict_mode == StrictModes.STRICT_WARN: warnings.warn(f"Long filename parse failed, but >=38 chars is too long for 'short': '{filename}'") - return None + return {} # Try to simplistically parse as short filename as last resort. @@ -946,7 +947,9 @@ def determine_properties_from_filename( def check_filename_and_contents_consistency( - input_file: pathlib.Path, ignore_single_epoch_short: bool = True + input_file: pathlib.Path, + ignore_single_epoch_short: bool = True, + output_orphan_prop_names: bool = False, ) -> Mapping[str, tuple[str, str]]: """ Checks that the content of the provided file matches what its filename says should be in it. @@ -961,6 +964,10 @@ def check_filename_and_contents_consistency( File properties which do not match are returned as a mapping of str -> tuple(str, str), taking the form property_name > filename_derived_value, file_contents_derived_value :param Path input_file: Path to the file to be checked. + :param bool ignore_single_epoch_short: (on by default) consider it ok for file content to be one epoch short of + what the filename says. + :param bool output_orphan_prop_names: (off by default) for properties found exclusively in file content or name + (not in both, and therefore not compared), return these as 'prop_name': None. :return Mapping[str, tuple[str,str]]: Empty map if properties agree, else map of discrepancies, OR None on failure. of property_name > filename_derived_value, file_contents_derived_value. :raises NotImplementedError: if called with a file type not yet supported. @@ -987,7 +994,20 @@ def check_filename_and_contents_consistency( ) discrepancies = {} - for key in file_name_properties.keys(): + # Check for keys only present on one side + orphan_keys = set(file_name_properties.keys()).symmetric_difference((set(file_content_properties.keys()))) + logging.warning( + "The following properties can't be compared, as they were extracted only from file content or " + f"name (not both): {str(orphan_keys)}" + ) + if output_orphan_prop_names: + # Output properties found only in content OR filename. + for orphan_key in orphan_keys: + discrepancies[orphan_key] = None + + mutual_keys = set(file_name_properties.keys()).difference(orphan_keys) + # For keys present in both dicts, compare values. + for key in mutual_keys: if (file_name_val := file_name_properties[key]) != (file_content_val := file_content_properties[key]): # If enabled, and epoch interval successfully extracted, ignore cases where the timespan of epochs in the # file content, is one epoch shorter than the timespan the filename implies (e.g. 23:55 vs 1D i.e. 24:00). diff --git a/gnssanalysis/gn_datetime.py b/gnssanalysis/gn_datetime.py index 8b68b91..55bd762 100644 --- a/gnssanalysis/gn_datetime.py +++ b/gnssanalysis/gn_datetime.py @@ -382,7 +382,7 @@ def j2000_to_igs_dt(j2000_secs: _np.ndarray) -> _np.ndarray: time_h = _pd.Series((hour - day).astype("int64").astype(str)).str.rjust(3).values time_m = _pd.Series((minute - hour).astype("int64").astype(str)).str.rjust(3).values # Width 12 due to one extra leading space (for easier concatenation next), then _0.00000000 format per SP3d spec: - time_s = (_pd.Series((datetime - minute)).view("int64") / 1e9).apply("{:.8f}".format).str.rjust(12).values + time_s = (_pd.Series((datetime - minute)).astype("int64") / 1e9).apply("{:.8f}".format).str.rjust(12).values return date_y + date_m + date_d + time_h + time_m + time_s diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 769826b..6487435 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -5,6 +5,7 @@ import re as _re from typing import Callable, Literal, Mapping, Optional, Union, List, Tuple, overload from pathlib import Path +import warnings import numpy as _np import pandas as _pd @@ -66,6 +67,18 @@ # Max width of SP3 lines _SP3_MAX_WIDTH: int = 80 +_SP3_DATA_LINE_WIDTH: int = 80 +# TODO note that our current parsing in _split_sp3_content() removes the '*' from epoch headers, making them 30 chars +# not 31 +_SP3_EPOCH_HEADER_WIDTH: int = 31 + +# Columns in SP3 we expect (by SP3 d spec) to be unused (contain a space). +# Deviation from this can be used to detect column misalignment +_SP3_UNUSED_COLUMN_INDEXES_EPOCH_HEADER: list[int] = [3, 8, 11, 14, 17, 20] +_SP3_UNUSED_COLUMN_INDEXES_POS_CLK: list[int] = [61, 64, 67, 70, 74, 77, 78] +_SP3_UNUSED_COLUMN_INDEXES_VELOCITY: list[int] = [61, 64, 67, 70, 74, 75, 76, 77, 78, 79, 80] +_SP3_UNUSED_COLUMN_INDEXES_EP: list[int] = [3, 4, 9, 14, 19, 27, 36, 45, 54, 63, 72] +_SP3_UNUSED_COLUMN_INDEXES_EV: list[int] = [3, 4, 9, 14, 19, 27, 36, 45, 54, 63, 72] _SP3_DEF_PV_WIDTH = [1, 3, 14, 14, 14, 14, 1, 2, 1, 2, 1, 2, 1, 3, 1, 1, 1, 2, 1, 1] _SP3_DEF_PV_NAME = [ @@ -298,7 +311,8 @@ def reflow_string_as_lines_for_comment_block( def get_sp3_comments(sp3_df: _pd.DataFrame) -> list[str]: """ - Utility function to retrieve stored SP3 comment lines from the attributes of an SP3 DataFrame. + Utility function to retrieve stored SP3 comment lines from the attributes of an SP3 DataFrame. NOTE: this does not + validate comment lines for compliance with SP3 d spec. :return list[str]: List of comment lines, verbatim. Note that comment line lead-in of '/* ' is not removed. """ return sp3_df.attrs["COMMENTS"] @@ -493,11 +507,109 @@ def mapparm(old: Tuple[float, float], new: Tuple[float, float]) -> Tuple[float, return offset, scale_factor +def _check_column_alignment_of_sp3_block( + date: str, + data: str, + strict_mode: type[StrictMode] = StrictModes.STRICT_WARN, + ignore_short_data_lines: bool = True, +) -> None: + """ + Check an individual SP3 block (one epoch), for line length and column alignment + :param str date: Epoch start date/time, typically beginning with '*' + :param str data: Entries for the given epoch (multiple rows, not yet split on '\n') + :param type[StrictMode] strict_mode: Determines response to issues found. Ignore or don't run check / warning / + raise exception. Defaults to WARN. + :param bool ignore_short_data_lines: don't fail the check due to data lines which are < 80 chars, + even though this is technically not to spec under SP3 version d. Defaults to True. + :raises ValueError: if validation doesn't pass, and strict_mode is set to STRICT_RAISE. + """ + # NOTE: we currently only run these checks on individual SP3 blocks, not the entire file. + + # Check epoch header (date) and data lines (P/V/EP/EV) are the right length, and that all unused columns (by SP3d + # spec) are in fact blank (contain spaces). If this is not true, it probably indicates column misalignment which + # will lead to incorrect parsing. + + # First check epoch header line + + if len(date) not in (_SP3_EPOCH_HEADER_WIDTH, _SP3_EPOCH_HEADER_WIDTH - 1): + if strict_mode == StrictModes.STRICT_RAISE: + raise ValueError( + f"Epoch header should be {_SP3_EPOCH_HEADER_WIDTH} chars long, but was {len(date)}: '{date}'" + ) + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning( + f"Epoch header should be {_SP3_EPOCH_HEADER_WIDTH} chars long, but was {len(date)}: '{date}'" + ) + epoch_header_offset = 0 # TODO remove this after fixing our block splitting logic to not remove the '*' + if len(date) == _SP3_EPOCH_HEADER_WIDTH - 1: # Cut short by our block splitting logic. Adjust indexes accordingly. + epoch_header_offset = -1 + # Check for polluted unused columns (indicating misalignment) + for col_num in _SP3_UNUSED_COLUMN_INDEXES_EPOCH_HEADER: + if date[(col_num + epoch_header_offset) - 1] != " ": # Unused column (accoding to spec) was not blank + if strict_mode == StrictModes.STRICT_RAISE: + raise ValueError(f"Misaligned epoch header line (unused column didn't contain a space): '{date}'") + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning(f"Misaligned epoch header line (unused column didn't contain a space): '{date}'") + + # Now check each data line for this epoch + + for line in data.splitlines(): + line_length = len(line) + if line_length == 0: + continue # Skip completely empty lines + + if line_length > _SP3_DATA_LINE_WIDTH: # Fatal, regardless of strict mode setting. + raise ValueError(f"Overlong data line ({line_length} chars long): '{line}'") + + elif line_length < _SP3_DATA_LINE_WIDTH: # Not compliant, but likely not serious + if not ignore_short_data_lines: + if strict_mode == StrictModes.STRICT_RAISE: + raise ValueError( + f"Data lines should be {_SP3_DATA_LINE_WIDTH} chars. Got one {line_length} chars long: '{line}'" + ) + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning( + f"Data lines should be {_SP3_DATA_LINE_WIDTH} chars. Got one {line_length} chars long: '{line}'" + ) + + # Line length is to spec (or short & we're ignoring that). + + # What record type is this line (POS, VEL, EP, or EV record)? + if line[0] == "P": + unused_column_indexes = _SP3_UNUSED_COLUMN_INDEXES_POS_CLK + elif line[0] == "V": + unused_column_indexes = _SP3_UNUSED_COLUMN_INDEXES_VELOCITY + elif line[:2] == "EP": + unused_column_indexes = _SP3_UNUSED_COLUMN_INDEXES_EP + elif line[:2] == "EV": + unused_column_indexes = _SP3_UNUSED_COLUMN_INDEXES_EV + else: + if strict_mode == StrictModes.STRICT_RAISE: + raise ValueError(f"Data line should start with P/V/EP/EV. First two chars were: '{line[:2]}'") + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning(f"Data line should start with P/V/EP/EV. First two chars were: '{line[:2]}'") + # Can't check column alignment for this line as we don't know which record type it is. + continue + + # For each 'unused' column we expect in a line of the determined record type, check column is actually empty. + for col_num in unused_column_indexes: + # Index out of range due to (non-compliant) short line. Skip testing further columns of this line. + if col_num > (line_length - 1): + break + if line[col_num - 1] != " ": # 'Unused' column wasn't empty! + if strict_mode == StrictModes.STRICT_RAISE: + raise ValueError(f"Misaligned data line (unused column did not contain a space): '{line}'") + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning(f"Misaligned data line (unused column did not contain a space): '{line}'") + + def _process_sp3_block( date: str, data: str, widths: List[int] = _SP3_DEF_PV_WIDTH, names: List[str] = _SP3_DEF_PV_NAME, + strict_mode: type[StrictMode] = StrictModes.STRICT_WARN, + ignore_short_data_lines: bool = True, ) -> _pd.DataFrame: """Process a single block of SP3 data. NOTE: this process creates a temporary DataFrame for *every epoch* of SP3 data read in, complete with indexes, etc. @@ -509,10 +621,18 @@ def _process_sp3_block( :param str data: The SP3 data block. :param List[int] widths: The widths of the columns in the SP3 data block. :param List[str] names: The names of the columns in the SP3 data block. + :param type[StrictMode] strict_mode: (default: WARN) level of strictness with which to check for SP3d + format compliance. StrictModes.STRICT_RAISE will raise an exception if a format issue is detected (except + if ignore_short_data_lines is enabled). Set to StrictModes.STRICT_OFF to neither warn nor raise. + :param bool ignore_short_data_lines: (default: True) when checking SP3d format compliance, don't warn/raise + about lines which are too short (eg 60 chars instead of 80). :return _pd.DataFrame: The processed SP3 data as a DataFrame. """ - if not data or len(data) == 0: + if not data or len(data) == 0: # No data in this epoch return _pd.DataFrame() + + _check_column_alignment_of_sp3_block(date, data, strict_mode, ignore_short_data_lines) + epochs_dt = _pd.to_datetime(_pd.Series(date).str.slice(2, 21).values.astype(str), format=r"%Y %m %d %H %M %S") # NOTE: setting dtype_backend="pyarrow" currently breaks parsing. temp_sp3 = _pd.read_fwf(_io.StringIO(data), widths=widths, names=names) @@ -562,7 +682,7 @@ def check_epoch_counts_for_discrepancies( draft_sp3_df: _pd.DataFrame, parsed_sp3_header: _pd.Series, sp3_path_or_bytes: Union[Path, str, bytes, None] = None, - continue_on_error: bool = True, + strict_mode: type[StrictMode] = StrictModes.STRICT_WARN, ): """ Utility function for use during SP3 parsing. Checks for discrepancies in the number of epochs the SP3 header @@ -576,6 +696,7 @@ def check_epoch_counts_for_discrepancies( later in the SP3 reading process. :param Union[Path, str, bytes, None] sp3_path_or_bytes: representation of the source SP3 file path or binary data, used to determine whether a filename can be found, and extract it if so. + :param type[StrictMode] strict_mode: (Default: WARN) indicates whether to raise, warn, or ignore issues found. :raises ValueError: if discrepancies found in number of epochs indicated by SP3 filename/header/contents """ sp3_filename: Union[str, None] = None @@ -589,15 +710,16 @@ def check_epoch_counts_for_discrepancies( header_epoch_count = int(parsed_sp3_header.HEAD.N_EPOCHS) if content_unique_epoch_count != header_epoch_count: - if not continue_on_error: + if strict_mode == StrictModes.STRICT_RAISE: raise ValueError( f"Header says there should be {header_epoch_count} epochs, however there are " f"{content_unique_epoch_count} (unique) epochs in the content (duplicate epoch check comes later)." ) - logger.warning( - f"WARNING: Header says there should be {header_epoch_count} epochs, however there are " - f"{content_unique_epoch_count} (unique) epochs in the content (duplicate epoch check comes later)." - ) + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning( + f"Header says there should be {header_epoch_count} epochs, however there are " + f"{content_unique_epoch_count} (unique) epochs in the content (duplicate epoch check comes later)." + ) if sp3_filename is None or len(sp3_filename) == 0: logger.info("SP3 filename not available to check for epoch count discrepancies, continuing") @@ -608,41 +730,45 @@ def check_epoch_counts_for_discrepancies( filename_derived_epoch_count: Union[int, None] = None # Try extracting properties from filename - try: - filename_props: dict = filenames.determine_properties_from_filename(sp3_filename) - filename_timespan_timedelta = filename_props.get("timespan") - if not isinstance(filename_timespan_timedelta, timedelta): - raise KeyError(f"Failed to get timespan from filename '{sp3_filename}'") - - filename_sample_rate = filename_props.get("sampling_rate") - if not isinstance(filename_sample_rate, str): - raise KeyError(f"Failed to get sampling_rate from filename '{sp3_filename}'") - - filename_sample_rate_timedelta = filenames.convert_nominal_span(filename_sample_rate) - filename_derived_epoch_count = int( - timedelta( - seconds=filename_timespan_timedelta.total_seconds() / filename_sample_rate_timedelta.total_seconds() - ).total_seconds() - ) - except Exception as e: - logger.warning("Failed to extract filename properties to validate against header / contents: ", e) + filename_props: dict = filenames.determine_properties_from_filename(sp3_filename) + filename_timespan_timedelta = filename_props.get("timespan") + if not isinstance(filename_timespan_timedelta, timedelta): + if strict_mode == StrictModes.STRICT_RAISE: + raise ValueError(f"Failed to get timespan from filename '{sp3_filename}'") + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning(f"Failed to get timespan from filename '{sp3_filename}'") return + filename_sample_rate = filename_props.get("sampling_rate") + if not isinstance(filename_sample_rate, str): + if strict_mode == StrictModes.STRICT_RAISE: + raise ValueError(f"Failed to get sampling_rate from filename '{sp3_filename}'") + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning(f"Failed to get sampling_rate from filename '{sp3_filename}'") + return + + filename_sample_rate_timedelta = filenames.convert_nominal_span(filename_sample_rate) + filename_derived_epoch_count = int( + timedelta( + seconds=filename_timespan_timedelta.total_seconds() / filename_sample_rate_timedelta.total_seconds() + ).total_seconds() + ) + # Now for the actual checks. # Check if the header states a number of epochs equal to (or one less than) the filename period implies # Note filename is allowed to indicate a period one epoch longer than the data. E.g. 01D can end at 23:55 # if in 5 min epochs. if header_epoch_count not in (filename_derived_epoch_count, filename_derived_epoch_count - 1): - if not continue_on_error: + if strict_mode == StrictModes.STRICT_RAISE: raise ValueError( f"Header says there should be {header_epoch_count} epochs, however filename '{sp3_filename}' implies " f"there should be {filename_derived_epoch_count} (or {filename_derived_epoch_count-1} at minimum)." ) - logger.warning( - f"WARNING: Header says there should be {header_epoch_count} epochs, however filename '{sp3_filename}' implies " - f"there should be {filename_derived_epoch_count} (or {filename_derived_epoch_count-1} at minimum)." - ) - # All good, validation passed. + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning( + f"Header says there should be {header_epoch_count} epochs, however filename '{sp3_filename}' implies " + f"there should be {filename_derived_epoch_count} (or {filename_derived_epoch_count-1} at minimum)." + ) def check_sp3_version(sp3_bytes: bytes, strict_mode: type[StrictMode] = StrictModes.STRICT_WARN) -> bool: @@ -675,7 +801,7 @@ def check_sp3_version(sp3_bytes: bytes, strict_mode: type[StrictMode] = StrictMo raise ValueError( f"Support for SP3 file version '{version_char_as_string}' is untested. Refusing to read as strict mode is on." ) - if strict_mode == StrictModes.STRICT_WARN: + elif strict_mode == StrictModes.STRICT_WARN: logger.warning( f"Reading an older SP3 file version '{version_char_as_string}'. This may not parse correctly!" ) @@ -744,7 +870,7 @@ def validate_sp3_comment_lines( raise ValueError( f"SP3 files must have at least 4 comment lines! File is {short_by_lines} short of that" ) - if strict_mode == StrictModes.STRICT_WARN: + elif strict_mode == StrictModes.STRICT_WARN: logger.warning(f"SP3 files must have at least 4 comment lines! File is {short_by_lines} short of that") if attempt_fixes: @@ -761,7 +887,7 @@ def validate_sp3_comment_lines( if (not attempt_fixes) or fail_on_fixed_issues: if strict_mode == StrictModes.STRICT_RAISE: raise ValueError(f"SP3 comments must begin with '/* ' (note space). Line: '{sp3_comment_lines[i]}'") - if strict_mode == StrictModes.STRICT_WARN: + elif strict_mode == StrictModes.STRICT_WARN: logger.warning(f"SP3 comments must begin with '/* ' (note space). Line: '{sp3_comment_lines[i]}'") if attempt_fixes: @@ -782,7 +908,7 @@ def validate_sp3_comment_lines( "SP3 comment lines must not exceed 80 chars (including lead-in). " f"Line (length {len(sp3_comment_lines[i])}): '{sp3_comment_lines[i]}'" ) - if strict_mode == StrictModes.STRICT_WARN: + elif strict_mode == StrictModes.STRICT_WARN: logger.warning( "SP3 comment lines must not exceed 80 chars (including lead-in). " f"Line (length {len(sp3_comment_lines[i])}): '{sp3_comment_lines[i]}'" @@ -801,12 +927,15 @@ def read_sp3( pOnly: bool = True, nodata_to_nan: bool = True, drop_offline_sats: bool = False, - continue_on_ep_ev_encountered: bool = True, - check_header_vs_filename_vs_content_discrepancies: bool = False, - # The following two apply when the above is enabled: + strict_mode: type[StrictMode] = StrictModes.STRICT_WARN, + # Optionally override above strictness for specific checks: + strictness_content_discrepancy: Optional[type[StrictMode]] = None, + strictness_duped_epochs: Optional[type[StrictMode]] = StrictModes.STRICT_WARN, + strictness_comments: Optional[type[StrictMode]] = None, + # Selectively turn off parts of strict mode (and don't raise exceptions in RAISE mode if these checks fail): skip_filename_in_discrepancy_check: bool = False, - continue_on_discrepancies: bool = False, - format_check_strictness: type[StrictMode] = StrictModes.STRICT_WARN, + skip_short_line_check: bool = True, + skip_version_check: bool = False, ) -> _pd.DataFrame: """Reads an SP3 file and returns the data as a pandas DataFrame. @@ -816,27 +945,34 @@ def read_sp3( and converts 999999* (indicating nodata) to NaN in the SP3 CLK column. Defaults to True. :param bool drop_offline_sats: If True, drops satellites from the DataFrame if they have ANY missing (nodata) values in the SP3 POS column. - :param bool continue_on_ep_ev_encountered: If True, logs a warning and continues if EV or EP rows are found in - the input SP3. These are currently unsupported by this function and will be ignored. Set to false to - raise a NotImplementedError instead. - :param bool check_header_vs_filename_vs_content_discrepancies: enable discrepancy checks on SP3 content vs - header vs filename. - :param bool skip_filename_in_discrepancy_check: If discrepancy checks enabled (see above), this allows skipping - the filename part of the checks, even if filename is available. - :param bool continue_on_discrepancies: (Only applicable with check_header_vs_filename_vs_content_discrepancies) - If True, logs a warning and continues if major discrepancies are detected between the SP3 content, SP3 header, - and SP3 filename (if available). Set to false to raise a ValueError instead. - :param type[StrictMode] format_check_strictness: (work in progress) defines the response to things that are not - quite to SP3 spec: whether to ignore, warn, or raise. Default: warn. + :param type[StrictMode] strict_mode: defines the response to things that are not quite to SP3 spec, or that we do + not fully support yet. Options are STRICT_RAISE, STRICT_WARN, or STRICT_OFF. Default: STRICT_WARN. Current functionality influenced by this includes: - - trying to read an SP3 version b or c file (not officially supported) + - trying to read an SP3 version b or c file (not officially supported). - SP3 comment specification voilations including < 4 comment lines, overlong comment lines, and incorrect start sequence i.e. line didn't begin exactly '/* '. + - inconsistency between SP3 filename, header, and content. + - SP3 lines too short (too long is always considered fatal), or misaligned (unused columns != ' ') In future it could also impact things like: - less than min number of SV entries This parameter could be renamed to enforce_strict_format_compliance once more extensive checks are added. + :param type[StrictMode] | None strictness_content_discrepancy: Optional override for strictness setting applied to + filename vs header vs content discrepancy checks. If not set strict_mode will be used. Regardless, + skip_filename_in_discrepancy_check will apply. + :param type[StrictMode] | None strictness_duped_epochs: (default: WARN and dedupe duplicate epochs) optional + override for how to handle duplicate epochs. + :param type[StrictMode] | None strictness_comments: (default: None) allows setting comment handling strictness + separately to overall strict_mode. + + *** OPTIONS to selectively turn off parts of strict mode, regardless of above settings ***: + :param bool skip_filename_in_discrepancy_check: (default False) in strict_mode WARN or RAISE, if an SP3 filename is + available, it is checked for consistency with header and content. Turn this ON to skip that check. + :param bool skip_short_line_check: (default True) regardless of strict_mode above, indicates whether to + skip raising / warning about lines which are too short according to the SP3d spec. + :param bool skip_version_check: (default False) for testing purposes, optionally turn off the SP3 version check. :return _pd.DataFrame: The SP3 data as a DataFrame. :raises FileNotFoundError: If the SP3 file specified by sp3_path_or_bytes does not exist. + :raises ValueError: For various errors extracting expected data :raises Exception: For other errors reading SP3 file/bytes :note: The SP3 file format is a standard format used for representing precise satellite ephemeris and clock data. @@ -850,7 +986,8 @@ def read_sp3( # Extract and check version. Raises exception for completely unsupported versions. # For version b and c behaviour depends on strict_mode setting - check_sp3_version(sp3_bytes=content, strict_mode=format_check_strictness) + if not skip_version_check: + check_sp3_version(sp3_bytes=content, strict_mode=strict_mode) # NOTE: Judging by the spec for SP3-d (2016), there should only be 2 '%i' lines in the file, and they should be # immediately followed by the mandatory 4+ comment lines. @@ -862,25 +999,32 @@ def read_sp3( # These will be written to DataFrame.attrs["COMMENTS"] for easy access (but please use get_sp3_comments()) comment_lines: list[str] = [line.decode("utf-8", errors="ignore").rstrip("\n") for line in comment_lines_bytes] # Validate comment lines (but don't make changes) - validate_sp3_comment_lines(comment_lines, strict_mode=format_check_strictness) + comment_strictness = strictness_comments if strictness_comments is not None else strict_mode + validate_sp3_comment_lines(comment_lines, strict_mode=comment_strictness) # NOTE: The comment lines should be contiguous (not fragmented throughout the file), and they should be immediately # followed by the first Epoch Header Record. # Note: this interpretation is based on page 16 of the SP3d spec, which says 'The comment lines should be read in # until the first Epoch Header Record (i.e. the first time tag line) is encountered.' # For robustness we strip comments THROUGHOUT the data before continuing parsing. - # Check no (non-comment) line is overlong (>80 chars not counting \n) - sp3_lines: List[str] = content.decode("utf-8", errors="ignore").split("\n") - overlong_lines_found: int = 0 - for line in sp3_lines: - if len(line) > _SP3_MAX_WIDTH: - overlong_lines_found += 1 - logger.error(f"Line of SP3 input exceeded max width: '{line}'") - - if overlong_lines_found > 0: - raise ValueError( - f"{overlong_lines_found} SP3 epoch data lines were overlong and very likely to parse incorrectly." - ) + if strict_mode != StrictModes.STRICT_OFF: + # Check no (non-comment) line is overlong (>80 chars not counting \n) + sp3_lines: List[str] = content.decode("utf-8", errors="ignore").split("\n") + overlong_lines_found: int = 0 + for line in sp3_lines: + if len(line) > _SP3_MAX_WIDTH: + overlong_lines_found += 1 + if overlong_lines_found <= 5: # Avoid printing out the whole file + warnings.warn(f"Line of SP3 input exceeded max width: '{line}'") + + if overlong_lines_found > 0: + if strict_mode == StrictModes.STRICT_RAISE: + raise ValueError( + f"{overlong_lines_found} SP3 epoch data lines were overlong and very likely to parse incorrectly." + ) + warnings.warn( + f"{overlong_lines_found} SP3 epoch data lines were overlong and very likely to parse incorrectly." + ) # NOTE: We just stripped all comment lines from the input data, so the %i records are now the last thing in the # header before the first Epoch Header Record. @@ -894,15 +1038,24 @@ def read_sp3( fline = header[fline_b : fline_b + 24].strip().split(b" ") base_xyzc = _np.asarray([float(fline[0])] * 3 + [float(fline[1])]) # exponent base date_lines, data_blocks = _split_sp3_content(content) - sp3_df = _pd.concat([_process_sp3_block(date, data) for date, data in zip(date_lines, data_blocks)]) + sp3_df = _pd.concat( + [ + _process_sp3_block(date, data, strict_mode=strict_mode, ignore_short_data_lines=skip_short_line_check) + for date, data in zip(date_lines, data_blocks) + ] + ) sp3_df = _reformat_df(sp3_df) # P/V/EP/EV flag handling is currently incomplete. The current implementation truncates to the first letter, # so can't parse nor differenitate between EP and EV! if "E" in sp3_df.index.get_level_values("PV_FLAG").unique(): - if not continue_on_ep_ev_encountered: + if strict_mode == StrictModes.STRICT_RAISE: raise NotImplementedError("EP and EV flag rows are currently not supported") - logger.warning("EP / EV flag rows encountered. These are not yet supported. Dropping them from DataFrame...") + elif strict_mode == StrictModes.STRICT_WARN: + logger.warning( + "EP / EV flag rows encountered. These are not yet supported. Dropping them from DataFrame. " + "Switch to strict mode RAISE to raise an exception instead" + ) # Filter out EV / EP records, before we trip ourselves up with them. Technically this is redundant as in the # next section we extract P and V records, then drop the PV_FLAG level. sp3_df = sp3_df.loc[sp3_df.index.get_level_values("PV_FLAG") != "E"] @@ -923,8 +1076,8 @@ def read_sp3( f"Unique PV flag values seen: {pv_flag_values}" ) - position_df = sp3_df.xs("P", level="PV_FLAG") - velocity_df = sp3_df.xs("V", level="PV_FLAG") + position_df = sp3_df.xs("P", level="PV_FLAG").sort_index() + velocity_df = sp3_df.xs("V", level="PV_FLAG").sort_index() # NOTE: care must now be taken to ensure this split and merge operation does not duplicate the FLAGS columns! @@ -934,7 +1087,7 @@ def read_sp3( # not drop all the data to which the column previously applied!) # We drop from pos rather than vel, because vel is on the right hand side, so the layout resembles the # layout of an SP3 file better. Functionally, this shouldn't make a difference. - position_df = position_df.drop(axis=1, columns="FLAGS") + position_df = position_df.drop(axis=1, columns="FLAGS", level=0) # TODO double check this level is right velocity_df.columns = SP3_VELOCITY_COLUMNS # NOTE from the docs: pandas.concat copies attrs only if all input datasets have the same attrs. @@ -961,21 +1114,26 @@ def read_sp3( # Convert 999999* (which indicates nodata in the SP3 CLK column) to NaN sp3_clock_nodata_to_nan(sp3_df) + # Use override strictness setting if set, otherwise use general strictness setting. + discrepancy_strictness = ( + strictness_content_discrepancy if strictness_content_discrepancy is not None else strict_mode + ) # Are we running discrepancy checks? - if check_header_vs_filename_vs_content_discrepancies: + if discrepancy_strictness != StrictModes.STRICT_OFF: # SV count discrepancy check content_sv_count = get_unique_svs(sp3_df).size # count() gives total of non-NA/null (vs shape, which gets dims of whole structure): header_sv_count = sp3_df.attrs["HEADER"].SV_INFO.count() if header_sv_count != content_sv_count: - if not continue_on_discrepancies: + if discrepancy_strictness == StrictModes.STRICT_RAISE: raise ValueError( f"Number of SVs in SP3 header ({header_sv_count}) did not match file contents ({content_sv_count})!" ) - logger.warning( - f"Number of SVs in SP3 header ({header_sv_count}) did not match file contents ({content_sv_count})!" - ) + if discrepancy_strictness == StrictModes.STRICT_WARN: + logger.warning( + f"Number of SVs in SP3 header ({header_sv_count}) did not match file contents ({content_sv_count})!" + ) # Epoch count discrepancy check @@ -985,19 +1143,35 @@ def read_sp3( draft_sp3_df=sp3_df, parsed_sp3_header=parsed_header, sp3_path_or_bytes=path_bytes_to_pass, - continue_on_error=continue_on_discrepancies, + strict_mode=discrepancy_strictness, ) - # Check for duplicate epochs, dedupe and log warning + strictness_dupes = strictness_duped_epochs if strictness_duped_epochs is not None else strict_mode + # NOTE: duplicates break things, so we still remove them even in STRICT_OFF mode + # Check for duplicate epochs, dedupe and (depending on config) silence, log warning, or raise if sp3_df.index.has_duplicates: # a literaly free check # This typically runs in sub ms time. Marks all but first instance as duped: duplicated_indexes = sp3_df.index.duplicated() first_dupe = sp3_df.index.get_level_values(0)[duplicated_indexes][0] - logging.warning( - f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). " - f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + _gn_const.J2000_ORIGIN} " - f"SP3 path is: '{description_for_path_or_bytes(sp3_path_or_bytes)}'. Duplicates will be removed, keeping first." - ) + if strictness_dupes == StrictModes.STRICT_RAISE: + raise ValueError( + f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). " + f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + _gn_const.J2000_ORIGIN} " + f"SP3 path is: '{description_for_path_or_bytes(sp3_path_or_bytes)}'." + ) + elif strictness_dupes == StrictModes.STRICT_WARN: + logger.warning( + f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). " + f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + _gn_const.J2000_ORIGIN} " + f"SP3 path is: '{description_for_path_or_bytes(sp3_path_or_bytes)}'. Duplicates will be removed, keeping first." + ) + else: + logger.info( + f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). " + f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + _gn_const.J2000_ORIGIN} " + f"SP3 path is: '{description_for_path_or_bytes(sp3_path_or_bytes)}'. Duplicates will be " + "removed, keeping first. NOTE: Not logged as a warning as dupe / global strictness was STRICT_OFF" + ) # Now dedupe them, keeping the first of any clashes: sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")] @@ -1039,7 +1213,7 @@ def _split_sp3_content(content: bytes) -> Tuple[List[str], _np.ndarray]: """ pattern = _re.compile(r"^\*(.+)$", _re.MULTILINE) blocks = pattern.split(content[: content.rfind(b"EOF")].decode()) - date_lines = blocks[1::2] + date_lines = blocks[1::2] # TODO this seems to be leaving out the "*" that it splits on data_blocks = _np.asarray(blocks[2::2]) return date_lines, data_blocks @@ -1275,7 +1449,10 @@ def get_unique_epochs(sp3_df: _pd.DataFrame) -> _pd.Index: def gen_sp3_header( - sp3_df: _pd.DataFrame, output_comments: bool = False, strict_mode: type[StrictMode] = StrictModes.STRICT_RAISE + sp3_df: _pd.DataFrame, + output_comments: bool = False, + strict_mode: type[StrictMode] = StrictModes.STRICT_RAISE, + strictness_comments: Optional[type[StrictMode]] = None, ) -> str: """ Generate the header for an SP3 file based on the given DataFrame. @@ -1286,6 +1463,8 @@ def gen_sp3_header( :param bool output_comment: Write the SP3 comment lines stored with the DataFrame, into the output. Off by default. :param type[StrictMode] strict_mode: Level of strictness with which to enforce SP3 specification rules (e.g. comments must have a leading space). Options defined by StrictModes, default STRICT_RAISE. + :param type[StrictMode] | None strictness_comments: Optional override on the level of strictness to apply to SP3 + comment checks. If not set, strict_mode is used. :return str: The generated SP3 header as a string. """ if output_comments and not "COMMENTS" in sp3_df.attrs: @@ -1399,10 +1578,12 @@ def gen_sp3_header( if output_comments: # Use actual comments from the DataFrame, not placeholders sp3_comment_lines = get_sp3_comments(sp3_df) + + comment_strictness = strictness_comments if strictness_comments is not None else strict_mode # Inspect incoming comments for validity, but don't change them. - if (strict_mode != StrictModes.STRICT_OFF) and not validate_sp3_comment_lines( + if (comment_strictness != StrictModes.STRICT_OFF) and not validate_sp3_comment_lines( sp3_comment_lines, - strict_mode=strict_mode, + strict_mode=comment_strictness, attempt_fixes=False, fail_on_fixed_issues=True, ): @@ -1710,15 +1891,18 @@ def sp3merge( sp3paths: List[str], clkpaths: Union[List[str], None] = None, nodata_to_nan: bool = False, + strict_mode: type[StrictMode] = StrictModes.STRICT_WARN, ) -> _pd.DataFrame: """Reads in a list of sp3 files and optional list of clk files and merges them into a single sp3 file. :param List[str] sp3paths: The list of paths to the sp3 files. :param Union[List[str], None] clkpaths: The list of paths to the clk files, or None if no clk files are provided. :param bool nodata_to_nan: Flag indicating whether to convert nodata values to NaN. + :param type[StrictMode] strict_mode: (default: WARN) Strictness with which to check the SP3 files read in, for + compliance with the SP3 d spec. :return _pd.DataFrame: The merged SP3 DataFrame. """ - sp3_dfs = [read_sp3(sp3_file, nodata_to_nan=nodata_to_nan) for sp3_file in sp3paths] + sp3_dfs = [read_sp3(sp3_file, nodata_to_nan=nodata_to_nan, strict_mode=strict_mode) for sp3_file in sp3paths] # Create a new attrs dictionary to be used for the output DataFrame merged_attrs = merge_attrs(sp3_dfs) # If attrs of two DataFrames are different, pd.concat will fail - set them to empty dict instead diff --git a/tests/test_clk.py b/tests/test_clk.py index 100949a..58dcc5a 100644 --- a/tests/test_clk.py +++ b/tests/test_clk.py @@ -34,7 +34,10 @@ def test_clk_read(self): self.assertEqual(clk_df_igs["EST"].iloc[-1], -0.0006105557076344, msg="Check last datapoint is correct") self.assertEqual(clk_df_gfz["EST"].iloc[-1], -0.000610553573006, msg="Check last datapoint is correct") - def test_compare_clk(self): + def test_diff_clk(self): + """ + Note this also tests the now deprecated version, compare_clk() + """ self.fs.reset() # Reset pyfakefs to delete any files which may have persisted from a previous test file_paths = ["/fake/dir/file0.clk", "/fake/dir/file1.clk"] self.fs.create_file(file_paths[0], contents=input_data_igs) diff --git a/tests/test_datasets/sp3_test_data.py b/tests/test_datasets/sp3_test_data.py index 4f7c564..71cd6d7 100644 --- a/tests/test_datasets/sp3_test_data.py +++ b/tests/test_datasets/sp3_test_data.py @@ -527,4 +527,118 @@ EOF """ +# Modified example of erroneous Ginan SP3 file, showing column misalignment, but trimmed to stay within width limit +# (so excessive line length alone can't be used as a detection mechanism here). +sp3_test_data_misaligned_columns = b"""#dP2025 6 17 6 0 0.00000000 2 ORBIT IGS14 FIT GAA +## 2371 194400.00000000 300.00000000 60843 0.0000000000000 ++ 30 G01G02G03G04G05G06G07G08G09G10G11G12G13G14G15G16G17 ++ G18G19G20G22G23G24G25G27G28G29G30G31G32 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +++ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +%c G cc GPS ccc cccc cccc cccc cccc ccccc ccccc ccccc ccccc +%c cc cc ccc ccc cccc cccc cccc cccc ccccc ccccc ccccc ccccc +%f 1.2500000 1.025000000 0.00000000000 0.000000000000000 +%f 0.0000000 0.000000000 0.00000000000 0.000000000000000 +%i 0 0 0 0 0 0 0 0 0 +%i 0 0 0 0 0 0 0 0 0 +/* Created using Ginan at: 2025-06-18 12:51:12.75. +/* WARNING: Not for operational use +/* FURTHER MODIFIED for test purposes +/* +* 2025 6 17 6 0 0.00000000 +PG01 12745.713058 -20920.445057 -10235.066434 14284.093701 +PG02 17356.908074 -19748.269066 1534.063156 13821.298806 +PG03 6996.011370 -13196.084315 -22140.265329 14657.057332 +PG04 -31089.146990 -27702.475462 15246.402904 8114.559984 +PG05 -19830.525605 7089.779814 16033.306442 13750.396677 +PG06 -16988.173766 -1949.602010 -20295.348670 13551.688732 +PG07 -2270.179246 -18040.766586 19792.234454 13925.747073 +PG08-538216.0254931012968.294871-1053208.82032548447864.338317 +PG09 -7083.058359 -25531.577633 -1359.151582 14650.575917 +PG10 23427.617554 10853.830687 7170.885439 13506.832573 +PG11 -21694.430081 8483.359745 -12659.392818 13271.029324 +PG12 -8774.447042 13094.193678 -21658.179358 13374.185875 +PG13 -13589.800116 7419.079867 21298.868393 14664.375523 +PG14 -21787.389970 -12589.371555 8982.803742 14624.735662 +PG15 -5984.439275 18021.812909 17992.304093 14271.520577 +PG16 23496.050192 -1131.469812 12387.722593 14915.487393 +PG17 -12641.965416 -20022.638173 -11406.922699 14218.640526 +PG18 3039.944975 16627.705484 20454.126785 13270.449953 +PG19 -15178.759658 -12279.849038 -18338.275906 14602.075331 +PG20 -25646.149370 2776.924136 6178.498750 14327.164082 +PG22 -24135.662639 -10404.533480 2756.681055 13861.093070 +PG23 12070.236616 15157.388506 18324.851816 14459.772237 +PG24 -13603.507447 22282.686090 -4032.083463 13584.378641 +PG25 4171.607493 16340.834530 -20932.463301 14447.073177 +PG27 14473.425293 -4477.963813 21484.052001 13826.995841 +PG28 15633.269690 4582.860945 -20980.507177 13339.472208 +PG29 3739.802769 26108.387963 -2416.969480 13470.028372 +PG30 -12369.980492 -9971.157905 21471.523822 13775.389189 +PG31 20837.949084 -4334.167565 -16353.885830 13760.712681 +PG32 15154.838093 16514.544274 -14152.651538 13544.100657 +* 2025 6 17 6 5 0.00000000 +PG01 14702.489814 -4489.551898 21654.412850 16700.555875 P P +PG02 16708.324133 6959.822686 20022.990974 16173.393625 P P +PG03 21866.763248 -9832.482878 11115.587576 17001.932807 P P +PG04-254631.818980 264705.806327 291334.154224 9437.987720 P P +PG05 -17459.071784 -1731.192351 -20138.703236 15732.218589 P P +PG06 -5247.775383 -25963.469495 -106.156584 15892.813576 P P +PG07-1245784.756055 252424.937619-521507.7748633049872.304950 P +PG08-1673371814.319093-548544075.309430-755457965.280842234820036.542227 +PG09 15025.560401 -9919.201269 -19640.474825 17065.384929 P P +PG10 -5113.384377 19726.422865 17084.945940 15487.981560 P P +PG11 -11664.773450 -20980.171450 -11401.745220 15372.013571 P P +PG12 -22562.376281 7828.132101 11314.783174 15355.656551 P P +PG13 -22359.653873 -14034.669487 -4237.766186 16646.332224 P P +PG14 7447.454218 -20754.053024 14678.463637 17039.487097 P P +PG15 -26387.472895 -2077.721255 2301.854662 16253.047110 P P +PG16 -58745.102181 -82649.138844 -19268.307194 18422.114784 P P +PG17 1364.961422 -15677.196135 21767.090532 16635.027947 P P +PG18 -262.139265 24019.547459 -6772.950905 15324.537271 P P +PG19 -9323.395555 -17517.966539 17764.328229 17012.335175 P P +PG20 -10127.671160 -11661.279769 -21531.147376 16309.157194 P P +PG22 2084.924516 -18627.608417 19304.902715 16201.797552 P P +PG23 -16663.682318 20046.358201 4400.280902 16440.931555 P P +PG24 -16443.409023 -1683.827956 20373.204621 15566.062183 P P +PG25 -19973.822684 17135.243863 1973.770404 16428.373763 P P +PG27-498360.627647-175556.439714-127044.290636 16004.097104 P P +PG28 4562.719415 26010.567999 2765.857306 15320.651995 P P +PG29 -16981.282694 7447.593834 -19121.287992 15454.465345 P P +PG30 -657.156697 -25249.235486 -7075.002741 20650.418342 P P +PG31 9940.923158 23378.917721 -7363.587468 16067.994743 P P +PG32 4039.189457 16316.323156 20853.738337 15792.719036 P P +EOF +""" + # TODO add some test data that actually has flags!! And write tests for those. diff --git a/tests/test_filenames.py b/tests/test_filenames.py index 19e8cec..f31c6c7 100644 --- a/tests/test_filenames.py +++ b/tests/test_filenames.py @@ -152,7 +152,7 @@ def test_determine_properties_from_filename(self): } derived_props_ltp = filenames.determine_properties_from_filename( - long_term_product, include_compressed_flag=True + long_term_product, reject_long_term_products=False, include_compressed_flag=True ) self.assertEqual(derived_props_ltp, expected_props_ltp) diff --git a/tests/test_sp3.py b/tests/test_sp3.py index 13f2b27..a381041 100644 --- a/tests/test_sp3.py +++ b/tests/test_sp3.py @@ -33,6 +33,7 @@ sp3_test_data_short_cod_final_header, # For validating header output # For testing comment validation (overlong comment with nothing but extra SPACES in it) sp3_test_data_short_cod_final_overlong_comment_line as sp3_with_overlong_comment, + sp3_test_data_misaligned_columns, ) @@ -91,33 +92,28 @@ def test_check_sp3_version(self): with self.assertRaises(ValueError): sp3.check_sp3_version(fake_header_version_c, strict_mode=STRICT_RAISE) - @patch("builtins.open", new_callable=mock_open, read_data=input_data) - def test_read_sp3_pOnly(self, mock_file): - result = sp3.read_sp3("mock_path", pOnly=True) + def test_read_sp3_pOnly(self): + result = sp3.read_sp3(input_data, pOnly=True, strict_mode=STRICT_OFF) self.assertEqual(len(result), 6) - @patch("builtins.open", new_callable=mock_open, read_data=input_data) - def test_read_sp3_pv(self, mock_file): - result = sp3.read_sp3("mock_path", pOnly=False) + def test_read_sp3_pv(self): + result = sp3.read_sp3(input_data, pOnly=False, strict_mode=STRICT_OFF) self.assertEqual(len(result), 6) # Ensure first epoch is correct / not skipped by incorrect detection of data start. # Check output of both header and data section. self.assertEqual(result.attrs["HEADER"]["HEAD"]["DATETIME"], "2007 4 12 0 0 0.00000000") self.assertEqual(result.index[0][0], 229608000) # Same date, as J2000 - @patch("builtins.open", new_callable=mock_open, read_data=sp3c_example2_data) - def test_read_sp3_pv_with_ev_ep_rows(self, mock_file): - # Expect exception relating to the EV and EP rows, as we can't currently handle them properly. - self.assertRaises( - NotImplementedError, sp3.read_sp3, "mock_path", pOnly=False, continue_on_ep_ev_encountered=False - ) + def test_read_sp3_pv_with_ev_ep_rows(self): + # Expect exception relating to the EV and EP rows (in RAISE mode), as we can't currently handle them properly. + with self.assertRaises(NotImplementedError) as raised_exception: + sp3.read_sp3(sp3c_example2_data, pOnly=False, strict_mode=STRICT_RAISE, skip_version_check=True) - @patch("builtins.open", new_callable=mock_open, read_data=input_data) - def test_read_sp3_header_svs_basic(self, mock_file): + def test_read_sp3_header_svs_basic(self): """ Minimal test of reading SVs from header """ - result = sp3.read_sp3("mock_path", pOnly=False) + result = sp3.read_sp3(input_data, pOnly=False, strict_mode=STRICT_OFF) self.assertEqual(result.attrs["HEADER"]["SV_INFO"].shape[0], 2, "Should be two SVs in data") self.assertEqual(result.attrs["HEADER"]["SV_INFO"].index[1], "G02", "Second SV should be G02") self.assertEqual(result.attrs["HEADER"]["SV_INFO"].iloc[1], 8, "Second ACC should be 8") @@ -168,13 +164,12 @@ def test_read_sp3_header_svs_detailed(self): end_line2_acc = sv_info.iloc[29] self.assertEqual(end_line2_acc, 18, msg="Last ACC on test line 2 (pos 30) should be 18") - @patch("builtins.open", new_callable=mock_open, read_data=sp3_test_data_cod_broken_missing_sv_in_content) - def test_read_sp3_validation_sv_count_mismatch_header_vs_content(self, mock_file): + def test_read_sp3_validation_sv_count_mismatch_header_vs_content(self): with self.assertRaises(ValueError) as context_manager: - result = sp3.read_sp3( - "COD0OPSFIN_20242010000_10M_05M_ORB.SP3", + sp3.read_sp3( + sp3_test_data_cod_broken_missing_sv_in_content, pOnly=False, - check_header_vs_filename_vs_content_discrepancies=True, # Actually enable the checks for this one + strict_mode=STRICT_RAISE, ) self.assertEqual( str(context_manager.exception), # What did the exception message say? @@ -182,14 +177,13 @@ def test_read_sp3_validation_sv_count_mismatch_header_vs_content(self, mock_file "Loading SP3 with mismatch between SV count in header and in content, should raise exception", ) - @patch("builtins.open", new_callable=mock_open, read_data=sp3c_example2_data) - def test_read_sp3_correct_svs_read_when_ev_ep_present(self, mock_file): + def test_read_sp3_correct_svs_read_when_ev_ep_present(self): # This should not raise an exception; SV count should match header if parsed correctly. result = sp3.read_sp3( - "testfile.SP3", + sp3c_example2_data, pOnly=False, - check_header_vs_filename_vs_content_discrepancies=True, # Actually enable the checks for this one - skip_filename_in_discrepancy_check=True, + strict_mode=STRICT_OFF, # Don't crash (or be noisy) in response to the EV/EP rows. We just want to ensure the other data is read ok. + skip_version_check=True, ) parsed_svs_content = sp3.get_unique_svs(result).astype(str).values self.assertEqual(set(parsed_svs_content), set(["G01", "G02", "G03", "G04", "G05"])) @@ -213,10 +207,67 @@ def test_read_sp3_overlong_lines(self): # sp3.read_sp3(test_content_no_overlong) with self.assertRaises(ValueError) as read_exception: - sp3.read_sp3(test_content_no_overlong) - self.assertEqual( - read_exception.msg, "2 SP3 epoch data lines were overlong and very likely to parse incorrectly." + sp3.read_sp3(test_content_no_overlong, strictness_comments=STRICT_OFF, strict_mode=STRICT_RAISE) + self.assertEqual( + str(read_exception.exception), "2 SP3 epoch data lines were overlong and very likely to parse incorrectly." + ) + + # # Assert that it still warns by default (NOTE: we can't test this with above example data, as it doens't + # # contain a full header) + # with self.assertWarns(Warning) as read_warning: + # sp3.read_sp3(test_content_no_overlong, strictness_comments=STRICT_OFF) + # self.assertEqual( + # str(read_warning.msg), "2 SP3 epoch data lines were overlong and very likely to parse incorrectly." + # ) + + def test_read_sp3_misalignment_check(self): + """ + Test that misaligned columns raise an error (currently only in STRICT mode). + Strictness of comment checking is set to OFF, as the test data has a comment line equal to '*/' not '*/ ' + """ + with self.assertRaises(ValueError) as read_exception: + sp3.read_sp3(sp3_test_data_misaligned_columns, strict_mode=STRICT_RAISE, strictness_comments=STRICT_OFF) + self.assertEqual( + "Misaligned data line (unused column did not contain a space): 'PG08-538216.0254931012968.294871-1053208.82032548447864.338317 '", + str(read_exception.exception), + ) + + def test_sp3_block_column_check_standalone(self): + """ + Test that misaligned columns in an epoch block raise an error (currently only in STRICT mode) + """ + # Check that misaligned (but artificially not overlong) data line, raises exception + with self.assertRaises(ValueError) as misaligned_ex: + data = """ +PG06 -16988.173766 -1949.602010 -20295.348670 13551.688732 +PG07 -2270.179246 -18040.766586 19792.234454 13925.747073 +PG08-538216.0254931012968.294871-1053208.82032548447864.338317 +PG09 -7083.058359 -25531.577633 -1359.151582 14650.575917 +""" + sp3._check_column_alignment_of_sp3_block("* 2025 6 17 6 0 0.00000000", data, strict_mode=STRICT_RAISE) + self.assertEqual( + "Misaligned data line (unused column did not contain a space): 'PG08-538216.0254931012968.294871-1053208.82032548447864.338317 '", + str(misaligned_ex.exception), + ) + + # Check that misaligned data line (flags) trimmed to 80 chars, raises exception + with self.assertRaises(ValueError) as misaligned_flags: + data = """ +PG06 -5247.775383 -25963.469495 -106.156584 15892.813576 P P +PG07-1245784.756055 252424.937619-521507.7748633049872.304950 P +""" + sp3._check_column_alignment_of_sp3_block("* 2025 6 17 6 0 0.00000000", data, strict_mode=STRICT_RAISE) + self.assertEqual( + "Misaligned data line (unused column did not contain a space): 'PG07-1245784.756055 252424.937619-521507.7748633049872.304950 P '", + str(misaligned_flags.exception), + ) + + # Check that misaligned date raises exception + with self.assertRaises(ValueError) as date_ex: + sp3._check_column_alignment_of_sp3_block( + date=" 2025 6 17 6 0 0.00000000", data="", strict_mode=STRICT_RAISE ) + self.assertIn("Epoch header should be 31 chars long, but was 29", str(date_ex.exception)) @staticmethod def get_example_dataframe(template_name: str = "normal", include_simple_header: bool = True) -> pd.DataFrame: @@ -303,7 +354,7 @@ def get_example_dataframe(template_name: str = "normal", include_simple_header: multi_index = pd.MultiIndex.from_product(index_elements, names=index_names) # Compose it all into a DataFrame - df = pd.DataFrame(frame_data, index=multi_index, columns=frame_columns) + df = pd.DataFrame(frame_data, index=multi_index, columns=frame_columns).sort_index() if include_simple_header: # Build SV table @@ -448,6 +499,7 @@ def test_gen_sp3_fundamentals(self): test_content_lines[i], f"Content line {i} didn't match", ) + # TODO add tests for correctly generating sp3 output content with gen_sp3_content() and gen_sp3_header() # These tests should include: # - Correct alignment of POS, CLK, STDPOS STDCLK, (not velocity yet), FLAGS @@ -465,7 +517,7 @@ def test_get_sp3_comments(self): "/* SP3 FILE GENERATED BY NAPEOS BAHN TOOL (DETERMINATION)", "/* PCV:IGS14_2022 OL/AL:EOT11A NONE YN ORB:CoN CLK:CoN", ] - sp3_df: pd.DataFrame = sp3.read_sp3(input_data) + sp3_df: pd.DataFrame = sp3.read_sp3(input_data, strict_mode=STRICT_OFF) self.assertEqual(sp3.get_sp3_comments(sp3_df), expected_comments, "SP3 comments read should match expectation") self.assertEqual(sp3_df.attrs["COMMENTS"], expected_comments, "Manual read of SP3 comments should match") @@ -478,7 +530,7 @@ def test_update_sp3_comments(self): "/* PCV:IGS14_2022 OL/AL:EOT11A NONE YN ORB:CoN CLK:CoN", ] # Initialise and check state - sp3_df: pd.DataFrame = sp3.read_sp3(input_data) # Load DataFrame + sp3_df: pd.DataFrame = sp3.read_sp3(input_data, strict_mode=STRICT_OFF) # Load DataFrame # Read comments directly from DataFrame to check they are as expected self.assertEqual(sp3_df.attrs["COMMENTS"], expected_comments, "SP3 initial comments read were not as expected") @@ -843,7 +895,7 @@ def test_sp3_comment_append_and_overwrite(self): ] # Load, check initial state - sp3_df = sp3.read_sp3(input_data) + sp3_df = sp3.read_sp3(input_data, strict_mode=STRICT_OFF) initial_commments = sp3.get_sp3_comments(sp3_df) self.assertEqual(expected_initial_comments, initial_commments, "Initial SP3 comments were not as expected") @@ -865,7 +917,7 @@ def test_sp3_comment_append_and_overwrite(self): self.assertEqual(expected_append_comments, appended_comments, "Comments were not as expected after appending") ### Overwrite/replace test ### - sp3_df = sp3.read_sp3(input_data) + sp3_df = sp3.read_sp3(input_data, strict_mode=STRICT_OFF) sp3.update_sp3_comments(sp3_df, comment_lines=new_lines, comment_string=new_freeform_string, ammend=False) expected_replaced_comments = [] @@ -925,14 +977,13 @@ def test_sp3_pos_nodata_to_nan(self): ) self.assertTrue(sp3_df.equals(expected_result)) - @patch("builtins.open", new_callable=mock_open, read_data=input_data) - def test_velinterpolation(self, mock_file): + def test_velinterpolation(self): """ Checking if the velocity interpolation works, right now there is no data to validate, the only thing done is to check if the function runs without errors TODO: update that to check actual expected values """ - result = sp3.read_sp3("mock_path", pOnly=True) + result = sp3.read_sp3(input_data, pOnly=True, strict_mode=STRICT_OFF) r = sp3.getVelSpline(result) r2 = sp3.getVelPoly(result, 2) self.assertIsNotNone(r) @@ -971,9 +1022,8 @@ def test_sp3_offline_sat_removal_standalone(self): "Should be two SVs after removing offline ones", ) - @patch("builtins.open", new_callable=mock_open, read_data=offline_sat_test_data) - def test_sp3_offline_sat_removal(self, mock_file): - sp3_df = sp3.read_sp3("mock_path", pOnly=False) + def test_sp3_offline_sat_removal(self): + sp3_df = sp3.read_sp3(offline_sat_test_data, pOnly=False, strict_mode=STRICT_OFF) # Confirm starting state of content self.assertEqual( @@ -1013,9 +1063,8 @@ def test_sp3_offline_sat_removal(self, mock_file): ) # sp3_test_data_truncated_cod_final is input_data2 - @patch("builtins.open", new_callable=mock_open, read_data=input_data2) - def test_filter_by_svs(self, mock_file): - sp3_df = sp3.read_sp3("mock_path", pOnly=False) + def test_filter_by_svs(self): + sp3_df = sp3.read_sp3(input_data2, pOnly=False) self.assertEqual( len(sp3_df.index.get_level_values(1).unique().array), 34, @@ -1043,9 +1092,8 @@ def test_filter_by_svs(self, mock_file): "Should have only specific sats after filtering by name", ) - @patch("builtins.open", new_callable=mock_open, read_data=offline_sat_test_data) - def test_trim_df(self, mock_file): - sp3_df = sp3.read_sp3("mock_path", pOnly=False) + def test_trim_df(self): + sp3_df = sp3.read_sp3(offline_sat_test_data, pOnly=False, strict_mode=STRICT_OFF) # offline_sat_test_data is based on the following file, but 3 epochs, not 2 days: filename = "IGS0DEMULT_20243181800_02D_05M_ORB.SP3" # Expected starting set of epochs, in j2000 seconds @@ -1119,24 +1167,20 @@ def test_trim_df(self, mock_file): class TestSP3Utils(TestCase): - - @patch("builtins.open", new_callable=mock_open, read_data=input_data) - def test_get_unique_svs(self, mock_file): - sp3_df = sp3.read_sp3("mock_path", pOnly=True) + def test_get_unique_svs(self): + sp3_df = sp3.read_sp3(input_data, pOnly=True, strict_mode=STRICT_OFF) unique_svs = set(sp3.get_unique_svs(sp3_df).values) self.assertEqual(unique_svs, set(["G01", "G02"])) - @patch("builtins.open", new_callable=mock_open, read_data=input_data) - def test_get_unique_epochs(self, mock_file): - sp3_df = sp3.read_sp3("mock_path", pOnly=True) + def test_get_unique_epochs(self): + sp3_df = sp3.read_sp3(input_data, pOnly=True, strict_mode=STRICT_OFF) unique_epochs = set(sp3.get_unique_epochs(sp3_df).values) self.assertEqual(unique_epochs, set([229608000, 229608900, 229609800])) - @patch("builtins.open", new_callable=mock_open, read_data=sp3c_example2_data) - def test_remove_svs_from_header(self, mock_file): - sp3_df = sp3.read_sp3("mock_path", pOnly=True) + def test_remove_svs_from_header(self): + sp3_df = sp3.read_sp3(sp3c_example2_data, pOnly=True, strict_mode=STRICT_OFF) self.assertEqual(sp3_df.attrs["HEADER"].HEAD.SV_COUNT_STATED, "5", "Header should have 5 SVs to start with") self.assertEqual( set(sp3_df.attrs["HEADER"].SV_INFO.index.values), @@ -1179,7 +1223,9 @@ def test_sp3merge(self): self.fs.create_file(file_paths[1], contents=input_data2) # Call the function to test - result = sp3.sp3merge(sp3paths=file_paths) + # Strict mode off to make output quieter. We check the output, and don't want to do format validation + # testing here. + result = sp3.sp3merge(sp3paths=file_paths, strict_mode=STRICT_OFF) # Test that epochs, satellite, attrs data is as expected: epoch_index = result.index.get_level_values("J2000")