-
Notifications
You must be signed in to change notification settings - Fork 7
feat: refresh F10.7 and ap data handling with partial data support #74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
f73c606
812270e
84eecb6
b79daa5
c7ba8c3
f12ec35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,7 +1,10 @@ | ||||||||||||
| """Utilities for obtaining input datasets.""" | ||||||||||||
|
|
||||||||||||
| import csv | ||||||||||||
| import os | ||||||||||||
| import urllib.request | ||||||||||||
| import warnings | ||||||||||||
| from datetime import datetime, timedelta | ||||||||||||
| from io import BytesIO | ||||||||||||
| from pathlib import Path | ||||||||||||
|
|
||||||||||||
|
|
@@ -14,6 +17,9 @@ | |||||||||||
| _DATA_FNAME: str = "SW-All.csv" | ||||||||||||
| _F107_AP_URL: str = f"https://celestrak.org/SpaceData/{_DATA_FNAME}" | ||||||||||||
| _F107_AP_PATH: Path = Path(pymsis.__file__).parent / _DATA_FNAME | ||||||||||||
| _PARTIAL_DATA_FNAME: str = "SW-Last5Years.csv" | ||||||||||||
| _PARTIAL_F107_AP_URL: str = f"https://celestrak.org/SpaceData/{_PARTIAL_DATA_FNAME}" | ||||||||||||
| _PARTIAL_F107_AP_PATH: Path = Path(pymsis.__file__).parent / _PARTIAL_DATA_FNAME | ||||||||||||
| _DATA: dict[str, npt.NDArray] | None = None | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
|
|
@@ -48,8 +54,60 @@ def download_f107_ap() -> None: | |||||||||||
| f.write(req.read()) | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| def _load_f107_ap_data() -> dict[str, npt.NDArray]: | ||||||||||||
| """Load data from disk, if it isn't present go out and download it first.""" | ||||||||||||
| def _refresh_f107_ap_data(last_obs_date: np.datetime64) -> None: | ||||||||||||
| """ | ||||||||||||
| Refresh exising SW_All file after last_obs_date. | ||||||||||||
|
|
||||||||||||
| Parameters | ||||||||||||
| ---------- | ||||||||||||
| last_obs_date : datetime of last observed parameter | ||||||||||||
| """ | ||||||||||||
| warnings.warn( | ||||||||||||
| f"Refreshing data using partial ap and F10.7 data from {_PARTIAL_F107_AP_URL}" | ||||||||||||
| ) | ||||||||||||
| req = urllib.request.urlopen(_PARTIAL_F107_AP_URL) | ||||||||||||
| with _PARTIAL_F107_AP_PATH.open("wb") as f: | ||||||||||||
| f.write(req.read()) | ||||||||||||
|
|
||||||||||||
| # Store all observed dates in existing_data | ||||||||||||
| with open(_F107_AP_PATH) as f: | ||||||||||||
| reader = csv.DictReader(f) | ||||||||||||
| existing_data = [ | ||||||||||||
| row for row in reader if np.datetime64(row["DATE"]) <= last_obs_date | ||||||||||||
| ] | ||||||||||||
|
|
||||||||||||
| # Store data new dates in updated_data | ||||||||||||
| updated_data = [] | ||||||||||||
| if os.path.exists(_PARTIAL_F107_AP_PATH): | ||||||||||||
| with open(_PARTIAL_F107_AP_PATH) as f: | ||||||||||||
| reader = csv.DictReader(f) | ||||||||||||
| updated_data = [row for row in reader] | ||||||||||||
|
|
||||||||||||
| # Merge existing_data and new_data into all_data | ||||||||||||
| all_data = {row["DATE"]: row for row in existing_data} | ||||||||||||
| all_data.update({row["DATE"]: row for row in updated_data}) | ||||||||||||
|
|
||||||||||||
| # Sort all_data by date | ||||||||||||
| sorted_data = sorted(all_data.values(), key=lambda x: np.datetime64(x["DATE"])) | ||||||||||||
| with open(_F107_AP_PATH, "w", newline="") as f: | ||||||||||||
| writer = csv.DictWriter(f, fieldnames=sorted_data[0].keys()) | ||||||||||||
| writer.writeheader() | ||||||||||||
| writer.writerows(sorted_data) | ||||||||||||
|
|
||||||||||||
| os.remove(_PARTIAL_F107_AP_PATH) | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| def _load_f107_ap_data(end_date: np.datetime64) -> dict[str, npt.NDArray]: | ||||||||||||
| """ | ||||||||||||
| Load data from disk. | ||||||||||||
|
|
||||||||||||
| - If it isn't present go out and download it first. | ||||||||||||
| - If present but not up to date, go out and refresh it. | ||||||||||||
|
|
||||||||||||
| Parameters | ||||||||||||
| ---------- | ||||||||||||
| end_date : datetime of last epoch | ||||||||||||
| """ | ||||||||||||
| if not _F107_AP_PATH.exists(): | ||||||||||||
| download_f107_ap() | ||||||||||||
|
|
||||||||||||
|
|
@@ -92,19 +150,34 @@ def _load_f107_ap_data() -> dict[str, npt.NDArray]: | |||||||||||
| # so we can't just go back in line lengths) | ||||||||||||
| with _F107_AP_PATH.open() as fin: | ||||||||||||
| with BytesIO() as fout: | ||||||||||||
| last_obs_date: np.datetime64 | None = None | ||||||||||||
| for line in fin: | ||||||||||||
| if "PRM" in line: | ||||||||||||
| # We don't want the monthly predicted values | ||||||||||||
| continue | ||||||||||||
| if ",,,,,,,," in line: | ||||||||||||
| # We don't want lines with missing values | ||||||||||||
| if "PRM" in line or ",,,,,,,," in line: | ||||||||||||
| # We don't want the monthly predicted values or missing values | ||||||||||||
| continue | ||||||||||||
| if ",OBS," in line: | ||||||||||||
| # Capture last observed date | ||||||||||||
| last_obs_date = np.datetime64(line.split(",")[0]) | ||||||||||||
|
||||||||||||
| fout.write(line.encode("utf-8")) | ||||||||||||
| fout.seek(0) | ||||||||||||
| arr = np.loadtxt( | ||||||||||||
| fout, delimiter=",", dtype=dtype, usecols=usecols, skiprows=1 | ||||||||||||
| ) # type: ignore | ||||||||||||
|
|
||||||||||||
| # Check if the file needs to be refreshed after parsing | ||||||||||||
| if last_obs_date is not None: | ||||||||||||
| file_mod_time = datetime.fromtimestamp(os.path.getmtime(_F107_AP_PATH)) | ||||||||||||
|
||||||||||||
| if last_obs_date < end_date and datetime.now() - file_mod_time >= timedelta( | ||||||||||||
| hours=3 | ||||||||||||
| ): | ||||||||||||
| # Refresh file if: | ||||||||||||
| # - requested date is beyond the end of current file | ||||||||||||
| # - file hasn't been refresh in the last 3 hours | ||||||||||||
| _refresh_f107_ap_data(last_obs_date) | ||||||||||||
|
|
||||||||||||
| # Re-parse the file after refresh | ||||||||||||
| return _load_f107_ap_data(end_date) | ||||||||||||
|
|
||||||||||||
| # transform each day's 8 3-hourly ap values into a single column | ||||||||||||
| ap = np.empty(len(arr) * 8, dtype=float) | ||||||||||||
| daily_ap = arr["Ap"].astype(float) | ||||||||||||
|
|
@@ -207,7 +280,7 @@ def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDA | |||||||||||
| | prior to current time | ||||||||||||
| """ | ||||||||||||
| dates = np.asarray(dates, dtype=np.datetime64) | ||||||||||||
| data = _DATA or _load_f107_ap_data() | ||||||||||||
| data = _DATA or _load_f107_ap_data(dates[-1]) | ||||||||||||
|
||||||||||||
| data = _DATA or _load_f107_ap_data(dates[-1]) | |
| # Load/download data right away. | |
| data = _DATA or _load_f107_ap_data() | |
| if dates[-1] > data["dates"][-1]: | |
| data = _new_function_to_download_recent_data() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Be wary of this because if there is any download error above, this file will be left hanging around.
A few quick thoughts:
_DATAarrays withnp.savez()instead of the csv files for easier manipulation? (Then you wouldn't need to mess with the dictreader/writer.