diff --git a/CHANGELOG.md b/CHANGELOG.md index a0759c7..74d10d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,23 @@ All notable changes to this project will be documented in this file. +## [v0.13.0] unreleased + +- **ADDED** `interpolate_indices` option. + - Linearly interpolate all input indices between their native time + resolution (daily for F10.7/F10.7a/daily Ap, 3-hourly for ap indices). + This can help avoid large step-changes in estimates when evaluating + at higher temporal resolutions. There can be step changes of more than + 20% in some situations when going from hour 02:59 to 03:00 when new + indices get used again on the 3-hourly boundaries. +- **CHANGED** `get_f107_ap()` returns arrays of the same shape as the input + - Previously, when a scalar date was passed to the utility function, the + `ap` values were 1d (7,) rather than of shape (1, 7) corresponding to + the date and all ap values. This makes the output consistent for scalar + and array-like inputs. + - This should have minimal impact on users, as it is a helper function + and behavior of the calculation routines is unchanged. + ## [v0.12.0] 2025-11-26 - **MAINTENANCE** Publish Python 3.14 wheels. diff --git a/examples/general_examples/plot_interpolation_comparison.py b/examples/general_examples/plot_interpolation_comparison.py new file mode 100644 index 0000000..215d10a --- /dev/null +++ b/examples/general_examples/plot_interpolation_comparison.py @@ -0,0 +1,107 @@ +""" +Comparison of Step vs Interpolated Geomagnetic Indices +======================================================= + +This example demonstrates the difference between using step-function +(discrete 3-hourly) and linearly interpolated geomagnetic indices +when calculating atmospheric density with MSIS. + +The step-function approach uses constant values within each 3-hour window, +while the interpolated approach smoothly transitions between values. +This can result in noticeable differences in computed density, especially +for high-cadence simulations. + +Note: This example uses storm-time Ap mode (geomagnetic_activity=-1) which +uses the 3-hourly ap values. The default daily Ap mode (geomagnetic_activity=1) +only uses daily average Ap values where interpolation has less effect. +""" + +from datetime import datetime, timedelta + +import matplotlib.dates as mdates +import matplotlib.pyplot as plt +import numpy as np + +import pymsis +from pymsis.utils import get_f107_ap + + +# %% +# Set up the simulation +# --------------------- +# We'll compute density at a single mid-latitude location over 24 hours. + +lat, lon, altitude = 45, -100, 400 # Mid-latitude, LEO altitude +start_date = datetime(2024, 5, 10, 0, 0, 0) + +# 1-minute sampling for 24 hours +dates = np.array( + [np.datetime64(start_date) + np.timedelta64(i, "m") for i in range(1440)] +) + +# %% +# Calculate densities and get indices +# ----------------------------------- + +# Use storm-time mode (geomagnetic_activity=-1) to use 3-hourly ap values +output_step = pymsis.calculate( + dates, lon, lat, altitude, geomagnetic_activity=-1, interpolate_indices=False +) +output_interp = pymsis.calculate( + dates, lon, lat, altitude, geomagnetic_activity=-1, interpolate_indices=True +) + +density_step = output_step[..., pymsis.Variable.MASS_DENSITY].squeeze() +density_interp = output_interp[..., pymsis.Variable.MASS_DENSITY].squeeze() + +# Get the underlying indices +_, _, ap_step = get_f107_ap(dates) +_, _, ap_interp = get_f107_ap(dates, interpolate=True) + +# %% +# Plot the comparison +# ------------------- + +fig, axes = plt.subplots(3, 1, figsize=(10, 8), sharex=True, layout="constrained") +plot_dates = [d.astype("datetime64[ms]").astype(datetime) for d in dates] + +# Panel 1: 3-hourly ap index +ax = axes[0] +ax.plot(plot_dates, ap_step[:, 1], "b-", label="Step", linewidth=1) +ax.plot(plot_dates, ap_interp[:, 1], "r--", label="Interpolated", linewidth=1) +ax.set_ylabel("3-hour ap") +ax.set_title("Geomagnetic Index (ap)") +ax.legend(loc="upper left") +ax.grid(True, alpha=0.3) + +# Panel 2: Mass density +ax = axes[1] +ax.plot(plot_dates, density_step, "b-", label="Step", linewidth=1) +ax.plot(plot_dates, density_interp, "r--", label="Interpolated", linewidth=1) +ax.set_ylabel("Density (kg/m³)") +ax.set_title(f"Mass Density at {altitude} km") +ax.legend(loc="upper left") +ax.grid(True, alpha=0.3) + +# Panel 3: Percent difference +ax = axes[2] +pct_diff = 100 * (density_interp - density_step) / density_step +ax.plot(plot_dates, pct_diff, "g-", linewidth=1) +ax.axhline(0, color="black", linestyle="-", alpha=0.3) +ax.set_ylabel("Difference (%)") +ax.set_xlabel("Time (UTC)") +ax.set_title("Relative Difference (Interpolated - Step) / Step") +ax.grid(True, alpha=0.3) + +# Format x-axis +ax.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M")) +ax.xaxis.set_major_locator(mdates.HourLocator(interval=3)) +ax.set_xlim(plot_dates[0], plot_dates[-1]) + +# Mark 3-hour boundaries on all panels +for ax in axes: + for hour in range(0, 25, 3): + boundary = start_date + timedelta(hours=hour) + ax.axvline(boundary, color="gray", linestyle=":", alpha=0.5) + +plt.show() diff --git a/pymsis/msis.py b/pymsis/msis.py index abc58bd..1dc707f 100644 --- a/pymsis/msis.py +++ b/pymsis/msis.py @@ -81,6 +81,7 @@ def calculate( *, options: list[float] | None = None, version: float | str = 2.1, + interpolate_indices: bool = False, **kwargs: dict, ) -> npt.NDArray: r""" @@ -140,6 +141,12 @@ def calculate( all keyword arguments specifying individual options will be ignored. version : Number or string, default: 2.1 MSIS version number, one of (0, 2.0, 2.1). + interpolate_indices : bool, default: False + If True, linearly interpolate F10.7, F10.7a, and ap indices between + their native time resolution (daily for F10.7/F10.7a, 3-hourly for ap). + If False (default), use step-function sampling where values change + discretely at boundaries. Linear interpolation can provide smoother + density variations for high-cadence simulations. **kwargs : dict Single options for the switches can be defined through keyword arguments. For example, ``calculate(..., geomagnetic_activity=-1)`` will set the @@ -227,6 +234,12 @@ def calculate( Terdiurnal (8-hour) atmospheric variations. Controls atmospheric tides that occur three times per day due to solar heating harmonics. Setting to 0 removes these 8-hourly atmospheric oscillations. + interpolate_indices : bool, default: False + If True, linearly interpolate F10.7, F10.7a, and ap indices between + their native time resolution (daily for F10.7/F10.7a, 3-hourly for ap). + If False (default), use step-function sampling where values change + discretely at boundaries. Linear interpolation can provide smoother + density variations for high-cadence simulations. Notes ----- @@ -241,7 +254,16 @@ def calculate( elif len(options) != num_options: raise ValueError(f"options needs to be a list of length {num_options}") - input_shape, input_data = create_input(dates, lons, lats, alts, f107s, f107as, aps) + input_shape, input_data = create_input( + dates, + lons, + lats, + alts, + f107s, + f107as, + aps, + interpolate_indices=interpolate_indices, + ) if np.any(~np.isfinite(input_data)): raise ValueError( @@ -422,6 +444,7 @@ def create_input( f107s: npt.ArrayLike | None = None, f107as: npt.ArrayLike | None = None, aps: npt.ArrayLike | None = None, + interpolate_indices: bool = False, ) -> tuple[tuple, npt.NDArray]: """ Combine all input values into a single flattened array. @@ -442,6 +465,9 @@ def create_input( F107 running 81-day average for the given date(s) aps : ArrayLike, optional Ap for the given date(s) + interpolate_indices : bool, default: False + If True, use linear interpolation for F10.7, F10.7a, and ap indices. + If False, use step-function (nearest-neighbor) sampling. Returns ------- @@ -471,7 +497,7 @@ def create_input( # If any of the geomagnetic data wasn't specified, we will default # to getting it with the utility functions. if f107s is None or f107as is None or aps is None: - data = get_f107_ap(dates_arr) + data = get_f107_ap(dates_arr, interpolate=interpolate_indices) # Only update the values that were None if f107s is None: f107s = data[0] diff --git a/pymsis/utils.py b/pymsis/utils.py index ee8cf1b..c7577bd 100644 --- a/pymsis/utils.py +++ b/pymsis/utils.py @@ -172,7 +172,10 @@ def _load_f107_ap_data() -> dict[str, npt.NDArray]: return data -def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]: +def get_f107_ap( + dates: npt.ArrayLike, + interpolate: bool = False, +) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]: """ Retrieve the F10.7 and ap data needed to run msis for the given times. @@ -183,6 +186,11 @@ def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDA ---------- dates : datetime-like or sequence of datetimes times of interest to get the proper ap and F10.7 values for + interpolate : bool, default: False + If True, linearly interpolate all values between their native time + resolution (daily for F10.7/F10.7a/daily Ap, 3-hourly for ap indices). + If False (default), use step-function sampling where values change + discretely at boundaries. Returns ------- @@ -211,7 +219,8 @@ def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDA data_start = data["dates"][0] data_end = data["dates"][-1] - date_offsets = dates - data_start + date_offsets = np.atleast_1d(dates - data_start) + # Ensure consistent array shapes for both scalar and array inputs # daily index values daily_indices = date_offsets.astype("timedelta64[D]").astype(int) # 3-hourly index values @@ -225,10 +234,39 @@ def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDA f"{data_end}." ) - f107 = np.take(data["f107"], daily_indices) - f107a = np.take(data["f107a"], daily_indices) - ap = np.take(data["ap"], ap_indices, axis=0) - warn_or_not = np.take(data["warn_data"], daily_indices) + if not interpolate: + # Normal sampling (step function behavior) + f107 = data["f107"][daily_indices] + f107a = data["f107a"][daily_indices] + ap = data["ap"][ap_indices] + else: + # Linear interpolation between time boundaries + fractional_hours = date_offsets / np.timedelta64(1, "h") + daily_frac = (fractional_hours / 24.0) % 1.0 + ap_frac = (fractional_hours % 3) / 3.0 + + def interp( + arr: npt.NDArray, idx: npt.NDArray, frac: npt.NDArray + ) -> npt.NDArray: + """Linear interpolation with automatic bounds clipping.""" + floor_vals = np.take(arr, idx, axis=0, mode="clip") + ceil_vals = np.take(arr, idx + 1, axis=0, mode="clip") + # Expand frac for broadcasting with 2D arrays (ap data) + if arr.ndim > frac.ndim: + frac = frac[:, np.newaxis] + return floor_vals + frac * (ceil_vals - floor_vals) + + # Interpolate daily values (F10.7, F10.7a) + f107 = interp(data["f107"], daily_indices, daily_frac) + f107a = interp(data["f107a"], daily_indices, daily_frac) + + # Interpolate 3-hourly ap values (all columns) + ap = interp(data["ap"], ap_indices, ap_frac) + + # Column 0 needs daily interpolation, use every 8th 3-hourly value + ap[:, 0] = interp(data["ap"][::8, 0], daily_indices, daily_frac) + + warn_or_not = data["warn_data"][daily_indices] # TODO: Do we want to warn if any values within 81 days of a point are used? # i.e. if any of the f107a values were interpolated or predicted if np.any(warn_or_not): diff --git a/tests/test_utils.py b/tests/test_utils.py index 7c13cae..c70d388 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -63,14 +63,14 @@ def test_loading_data(monkeypatch, tmp_path): np.datetime64("2000-01-01T00:00"), [np.nan], [166.2], - [30, 56, np.nan, np.nan, np.nan, np.nan, np.nan], + [[30, 56, np.nan, np.nan, np.nan, np.nan, np.nan]], ), # Middle of the data file, should be fully filled ( np.datetime64("2000-07-01T12:00"), [159.6], [186.3], - [7, 4, 5, 9, 4, 5.25, 5.75], + [[7, 4, 5, 9, 4, 5.25, 5.75]], ), # Requesting two dates should return length two arrays ( @@ -85,7 +85,7 @@ def test_loading_data(monkeypatch, tmp_path): np.datetime64("2000-12-30T12:00"), [173.7], [173.5], - [3, 4, 4, 3, 3, 6.375, 5.375], + [[3, 4, 4, 3, 3, 6.375, 5.375]], ), ], ) @@ -130,3 +130,40 @@ def test_get_f107_ap_interpolated_warns(dates): UserWarning, match="There is data that was either interpolated or" ): utils.get_f107_ap(dates) + + +@pytest.mark.parametrize( + ("date", "expected_f107", "expected_ap_col1"), + [ + # At exact boundary (00:00), interpolated should match step values + ([np.datetime64("2000-07-01T00:00")], 159.6, 9.0), + # At 3-hour boundary: ap should match, f107 interpolates within day + ([np.datetime64("2000-07-01T03:00")], 160.1125, 4.0), + # Mid-way through 3-hour window (01:30): both ap and f107 interpolate + ([np.datetime64("2000-07-01T01:30")], 159.85625, 6.5), + ], +) +def test_get_f107_ap_interpolate(date, expected_f107, expected_ap_col1): + """Test linear interpolation of F10.7 and ap values.""" + f107, _, ap = utils.get_f107_ap(date, interpolate=True) + assert_allclose(f107[0], expected_f107) + assert_allclose(ap[0, 1], expected_ap_col1) + + +@pytest.mark.parametrize( + "date", + [ + np.datetime64("2000-01-01T00:00"), + np.datetime64("2000-07-01T00:00"), + np.datetime64("2000-07-02T00:00"), + ], +) +def test_get_f107_ap_interpolate_exact_match(date): + """Test that interpolation at exact data points returns the same as no interp.""" + f107_nointerp, f107a_nointerp, ap_nointerp = utils.get_f107_ap( + date, interpolate=False + ) + f107_interp, f107a_interp, ap_interp = utils.get_f107_ap(date, interpolate=True) + assert_allclose(f107_nointerp, f107_interp) + assert_allclose(f107a_nointerp, f107a_interp) + assert_array_equal(ap_nointerp, ap_interp)