SWxTREC · mananapr · Jan 6, 2025 · Jan 6, 2025 · Jan 23, 2025 · Jan 23, 2025
diff --git a/pymsis/__init__.py b/pymsis/__init__.py
@@ -7,4 +7,4 @@
 
 __version__ = importlib.metadata.version("pymsis")
 
-__all__ = ["__version__", "Variable", "calculate"]
+__all__ = ["Variable", "__version__", "calculate"]
diff --git a/pymsis/utils.py b/pymsis/utils.py
@@ -1,7 +1,10 @@
 """Utilities for obtaining input datasets."""
 
+import csv
+import os
 import urllib.request
 import warnings
+from datetime import datetime, timedelta
 from io import BytesIO
 from pathlib import Path
 
@@ -14,6 +17,9 @@
 _DATA_FNAME: str = "SW-All.csv"
 _F107_AP_URL: str = f"https://celestrak.org/SpaceData/{_DATA_FNAME}"
 _F107_AP_PATH: Path = Path(pymsis.__file__).parent / _DATA_FNAME
+_PARTIAL_DATA_FNAME: str = "SW-Last5Years.csv"
+_PARTIAL_F107_AP_URL: str = f"https://celestrak.org/SpaceData/{_PARTIAL_DATA_FNAME}"
+_PARTIAL_F107_AP_PATH: Path = Path(pymsis.__file__).parent / _PARTIAL_DATA_FNAME
 _DATA: dict[str, npt.NDArray] | None = None
 
 
@@ -48,8 +54,60 @@ def download_f107_ap() -> None:
         f.write(req.read())
 
 
-def _load_f107_ap_data() -> dict[str, npt.NDArray]:
-    """Load data from disk, if it isn't present go out and download it first."""
+def _refresh_f107_ap_data(last_obs_date: np.datetime64) -> None:
+    """
+    Refresh exising SW_All file after last_obs_date.
+
+    Parameters
+    ----------
+    last_obs_date : datetime of last observed parameter
+    """
+    warnings.warn(
+        f"Refreshing data using partial ap and F10.7 data from {_PARTIAL_F107_AP_URL}"
+    )
+    req = urllib.request.urlopen(_PARTIAL_F107_AP_URL)
+    with _PARTIAL_F107_AP_PATH.open("wb") as f:
+        f.write(req.read())
+
+    # Store all observed dates in existing_data
+    with open(_F107_AP_PATH) as f:
+        reader = csv.DictReader(f)
+        existing_data = [
+            row for row in reader if np.datetime64(row["DATE"]) <= last_obs_date
+        ]
+
+    # Store data new dates in updated_data
+    updated_data = []
+    if os.path.exists(_PARTIAL_F107_AP_PATH):
+        with open(_PARTIAL_F107_AP_PATH) as f:
+            reader = csv.DictReader(f)
+            updated_data = [row for row in reader]
+
+    # Merge existing_data and new_data into all_data
+    all_data = {row["DATE"]: row for row in existing_data}
+    all_data.update({row["DATE"]: row for row in updated_data})
+
+    # Sort all_data by date
+    sorted_data = sorted(all_data.values(), key=lambda x: np.datetime64(x["DATE"]))
+    with open(_F107_AP_PATH, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=sorted_data[0].keys())
+        writer.writeheader()
+        writer.writerows(sorted_data)
+
+    os.remove(_PARTIAL_F107_AP_PATH)
+
+
+def _load_f107_ap_data(end_date: np.datetime64) -> dict[str, npt.NDArray]:
+    """
+    Load data from disk.
+
+        - If it isn't present go out and download it first.
+        - If present but not up to date, go out and refresh it.
+
+    Parameters
+    ----------
+    end_date : datetime of last epoch
+    """
     if not _F107_AP_PATH.exists():
         download_f107_ap()
 
@@ -92,19 +150,34 @@ def _load_f107_ap_data() -> dict[str, npt.NDArray]:
     # so we can't just go back in line lengths)
     with _F107_AP_PATH.open() as fin:
         with BytesIO() as fout:
+            last_obs_date: np.datetime64 | None = None
             for line in fin:
-                if "PRM" in line:
-                    # We don't want the monthly predicted values
-                    continue
-                if ",,,,,,,," in line:
-                    # We don't want lines with missing values
+                if "PRM" in line or ",,,,,,,," in line:
+                    # We don't want the monthly predicted values or missing values
                     continue
+                if ",OBS," in line:
+                    # Capture last observed date
+                    last_obs_date = np.datetime64(line.split(",")[0])
                 fout.write(line.encode("utf-8"))
             fout.seek(0)
             arr = np.loadtxt(
                 fout, delimiter=",", dtype=dtype, usecols=usecols, skiprows=1
             )  # type: ignore
 
+    # Check if the file needs to be refreshed after parsing
+    if last_obs_date is not None:
+        file_mod_time = datetime.fromtimestamp(os.path.getmtime(_F107_AP_PATH))
+        if last_obs_date < end_date and datetime.now() - file_mod_time >= timedelta(
+            hours=3
+        ):
+            # Refresh file if:
+            # - requested date is beyond the end of current file
+            # - file hasn't been refresh in the last 3 hours
+            _refresh_f107_ap_data(last_obs_date)
+
+            # Re-parse the file after refresh
+            return _load_f107_ap_data(end_date)
+
     # transform each day's 8 3-hourly ap values into a single column
     ap = np.empty(len(arr) * 8, dtype=float)
     daily_ap = arr["Ap"].astype(float)
@@ -207,7 +280,7 @@ def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDA
             |     prior to current time
     """
     dates = np.asarray(dates, dtype=np.datetime64)
-    data = _DATA or _load_f107_ap_data()
+    data = _DATA or _load_f107_ap_data(dates[-1])
-    data = _DATA or _load_f107_ap_data(dates[-1])
+    # Load/download data right away.
+    data = _DATA or _load_f107_ap_data()
+    if dates[-1] > data["dates"][-1]:
+        data = _new_function_to_download_recent_data()
-    data = _DATA or _load_f107_ap_data(dates[-1])
+    # Load/download data right away.
+    data = _DATA or _load_f107_ap_data()
+    if dates[-1] > data["dates"][-1]:
+        data = _new_function_to_download_recent_data()
 
     data_start = data["dates"][0]
     data_end = data["dates"][-1]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,4 +7,4 @@

		__version__ = importlib.metadata.version("pymsis")

		__all__ = ["__version__", "Variable", "calculate"]
		__all__ = ["Variable", "__version__", "calculate"]