From 02a3f340a4dcfa0964b58ed2ea4d9a688eaa6d97 Mon Sep 17 00:00:00 2001 From: Ninja-lgtm Date: Mon, 9 Feb 2026 23:10:48 +0530 Subject: [PATCH 1/8] Migrate network clients to new unified Scraper format (Fix #113) --- changelog/113.bugfix.rst | 1 + radiospectra/net/sources/ecallisto.py | 17 +++++++---------- radiospectra/net/sources/ilofar.py | 13 +++++++------ radiospectra/net/sources/psp.py | 13 ++++++------- radiospectra/net/sources/rstn.py | 14 ++++++++------ 5 files changed, 29 insertions(+), 29 deletions(-) create mode 100644 changelog/113.bugfix.rst diff --git a/changelog/113.bugfix.rst b/changelog/113.bugfix.rst new file mode 100644 index 0000000..05e2a21 --- /dev/null +++ b/changelog/113.bugfix.rst @@ -0,0 +1 @@ +Migrated all network clients to the new unified Scraper format introduced in ``sunpy`` 6.1, resolving compatibility issues with ``sunpy`` 7.1.0. diff --git a/radiospectra/net/sources/ecallisto.py b/radiospectra/net/sources/ecallisto.py index 5d1035b..11c6265 100644 --- a/radiospectra/net/sources/ecallisto.py +++ b/radiospectra/net/sources/ecallisto.py @@ -39,26 +39,23 @@ class eCALLISTOClient(GenericClient): """ - baseurl = ( - r"http://soleil80.cs.technik.fhnw.ch/solarradio/data/2002-20yy_Callisto/" - r"%Y/%m/%d/{obs}_%Y%m%d_%H%M%S.*.fit.gz" - ) pattern = ( - r"{}/2002-20yy_Callisto/{year:4d}/{month:2d}/{day:2d}/" - r"{Observatory}_{year:4d}{month:2d}{day:2d}" - r"_{hour:2d}{minute:2d}{second:2d}{suffix}.fit.gz" + r"http://soleil80.cs.technik.fhnw.ch/solarradio/data/2002-20yy_Callisto/" + r"{{year:4d}}/{{month:2d}}/{{day:2d}}/{obs}_{{year:4d}}{{month:2d}}{{day:2d}}" + r"_{{hour:2d}}{{minute:2d}}{{second:2d}}{{suffix}}.fit.gz" ) @classmethod def pre_search_hook(cls, *args, **kwargs): baseurl, pattern, matchdict = super().pre_search_hook(*args, **kwargs) - obs = matchdict.pop("Observatory") + obs = matchdict["Observatory"] if obs[0] == "*": - baseurl = baseurl.format(obs=r".*") + pattern = pattern.replace("{obs}", "{{Observatory}}") + matchdict.pop("Observatory") else: # Need case sensitive so have to override obs_attr = [a for a in args if isinstance(a, Observatory)][0] - baseurl = baseurl.format(obs=obs_attr.value) + pattern = pattern.replace("{obs}", obs_attr.value) return baseurl, pattern, matchdict def post_search_hook(self, exdict, matchdict): diff --git a/radiospectra/net/sources/ilofar.py b/radiospectra/net/sources/ilofar.py index 23a4580..9a07818 100644 --- a/radiospectra/net/sources/ilofar.py +++ b/radiospectra/net/sources/ilofar.py @@ -48,9 +48,11 @@ class ILOFARMode357Client(GenericClient): """ - baseurl = r"https://data.lofar.ie/%Y/%m/%d/bst/kbt/{dataset}/" r"%Y%m%d_\d{{6}}_bst_00\S{{1}}.dat" - - pattern = r"{}/{year:4d}{month:2d}{day:2d}_{hour:2d}{minute:2d}{second:2d}" r"_bst_00{Polarisation}.dat" + pattern = ( + r"https://data.lofar.ie/{{year:4d}}/{{month:2d}}/{{day:2d}}/bst/kbt/{dataset}/" + r"{{year:4d}}{{month:2d}}{{day:2d}}_{{hour:2d}}{{minute:2d}}{{second:2d}}" + r"_bst_00{{Polarisation}}.dat" + ) @classmethod def _check_wavelengths(cls, wavelength): @@ -94,9 +96,8 @@ def search(self, *args, **kwargs): tr = TimeRange(matchdict["Start Time"], matchdict["End Time"]) for dataset in DATASET_NAMES: - url = self.baseurl.format(dataset=dataset) - scraper = Scraper(url, regex=True) - filesmeta = scraper._extract_files_meta(tr, extractor=self.pattern) + scraper = Scraper(self.pattern.replace("{dataset}", dataset)) + filesmeta = scraper._extract_files_meta(tr) for i in filesmeta: rowdict = self.post_search_hook(i, matchdict) metalist.append(rowdict) diff --git a/radiospectra/net/sources/psp.py b/radiospectra/net/sources/psp.py index 5c6c054..e413e61 100644 --- a/radiospectra/net/sources/psp.py +++ b/radiospectra/net/sources/psp.py @@ -46,11 +46,10 @@ class RFSClient(GenericClient): """ - baseurl = ( - r"https://spdf.gsfc.nasa.gov/pub/data/psp/fields/l2/{Wavelength}/" - r"{year}/psp_fld_l2_(\w){{7}}_(\d){{8}}_v(\d){{2}}.cdf" + pattern = ( + r"https://spdf.gsfc.nasa.gov/pub/data/psp/fields/l2/{receiver}/{year_path}/" + r"psp_fld_l2_{{Wavelength}}_{{year:4d}}{{month:2d}}{{day:2d}}_v{{version:2d}}.cdf" ) - pattern = r"{}/{Wavelength}/{year:4d}/" r"psp_fld_l2_{Wavelength}_{year:4d}{month:2d}{day:2d}_v{:2d}.cdf" @classmethod def _check_wavelengths(cls, wavelength): @@ -111,9 +110,9 @@ def search(self, *args, **kwargs): tr = TimeRange(matchdict["Start Time"], matchdict["End Time"]) for receiver in receivers: for year in range(start_year, end_year + 1): - urlpattern = self.baseurl.format(Wavelength=receiver, year=year) - scraper = Scraper(urlpattern, regex=True) - filesmeta = scraper._extract_files_meta(tr, extractor=self.pattern) + pattern = self.pattern.replace("{receiver}", receiver).replace("{year_path}", str(year)) + scraper = Scraper(pattern) + filesmeta = scraper._extract_files_meta(tr) for i in filesmeta: rowdict = self.post_search_hook(i, matchdict) metalist.append(rowdict) diff --git a/radiospectra/net/sources/rstn.py b/radiospectra/net/sources/rstn.py index ecdbf80..67d17ed 100644 --- a/radiospectra/net/sources/rstn.py +++ b/radiospectra/net/sources/rstn.py @@ -31,11 +31,11 @@ class RSTNClient(GenericClient): """ - baseurl = ( + pattern = ( r"https://www.ngdc.noaa.gov/stp/space-weather/solar-data/" - r"solar-features/solar-radio/rstn-spectral/{obs}/%Y/%m/.*.gz" + r"solar-features/solar-radio/rstn-spectral/{obs}/{{year:4d}}/{{month:2d}}/" + r"{{obs_short:2l}}{{year2:2d}}{{month2:2d}}{{day:2d}}.SRS.gz" ) - pattern = r"{}/rstn-spectral/{obs}/{year:4d}/{month:2d}/" r"{obs_short:2l}{year2:2d}{month2:2d}{day:2d}.SRS.gz" observatory_map = { "Holloman": "holloman", @@ -47,14 +47,16 @@ class RSTNClient(GenericClient): observatory_map = {**observatory_map, **dict(map(reversed, observatory_map.items()))} def search(self, *args, **kwargs): - baseurl, pattern, matchdict = self.pre_search_hook(*args, **kwargs) + _, pattern, matchdict = self.pre_search_hook(*args, **kwargs) metalist = [] for obs in matchdict["Observatory"]: - scraper = Scraper(baseurl.format(obs=self.observatory_map[obs.title()]), regex=True) + obs_path = self.observatory_map[obs.title()] + scraper = Scraper(pattern.replace("{obs}", obs_path)) tr = TimeRange(matchdict["Start Time"], matchdict["End Time"]) - filesmeta = scraper._extract_files_meta(tr, extractor=pattern, matcher=matchdict) + filesmeta = scraper._extract_files_meta(tr, matcher=matchdict) for i in filesmeta: + i["obs"] = obs_path rowdict = self.post_search_hook(i, matchdict) metalist.append(rowdict) From 4d0d109c4d0afe149a961930254acf6feabb5ae1 Mon Sep 17 00:00:00 2001 From: Ninja-lgtm Date: Mon, 9 Feb 2026 23:28:39 +0530 Subject: [PATCH 2/8] Rename changelog to match PR #142 --- changelog/{113.bugfix.rst => 142.bugfix.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog/{113.bugfix.rst => 142.bugfix.rst} (100%) diff --git a/changelog/113.bugfix.rst b/changelog/142.bugfix.rst similarity index 100% rename from changelog/113.bugfix.rst rename to changelog/142.bugfix.rst From fb4459e686799c93bf7e6c242fe803e8c932542a Mon Sep 17 00:00:00 2001 From: Ninja-lgtm Date: Tue, 10 Feb 2026 00:29:26 +0530 Subject: [PATCH 3/8] Use explicit format keyword in Scraper instantiation for SunPy 7.0 compatibility --- radiospectra/net/sources/ilofar.py | 2 +- radiospectra/net/sources/psp.py | 2 +- radiospectra/net/sources/rstn.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/radiospectra/net/sources/ilofar.py b/radiospectra/net/sources/ilofar.py index 9a07818..724318f 100644 --- a/radiospectra/net/sources/ilofar.py +++ b/radiospectra/net/sources/ilofar.py @@ -96,7 +96,7 @@ def search(self, *args, **kwargs): tr = TimeRange(matchdict["Start Time"], matchdict["End Time"]) for dataset in DATASET_NAMES: - scraper = Scraper(self.pattern.replace("{dataset}", dataset)) + scraper = Scraper(format=self.pattern.replace("{dataset}", dataset)) filesmeta = scraper._extract_files_meta(tr) for i in filesmeta: rowdict = self.post_search_hook(i, matchdict) diff --git a/radiospectra/net/sources/psp.py b/radiospectra/net/sources/psp.py index e413e61..692ec93 100644 --- a/radiospectra/net/sources/psp.py +++ b/radiospectra/net/sources/psp.py @@ -111,7 +111,7 @@ def search(self, *args, **kwargs): for receiver in receivers: for year in range(start_year, end_year + 1): pattern = self.pattern.replace("{receiver}", receiver).replace("{year_path}", str(year)) - scraper = Scraper(pattern) + scraper = Scraper(format=pattern) filesmeta = scraper._extract_files_meta(tr) for i in filesmeta: rowdict = self.post_search_hook(i, matchdict) diff --git a/radiospectra/net/sources/rstn.py b/radiospectra/net/sources/rstn.py index 67d17ed..1f8d9c0 100644 --- a/radiospectra/net/sources/rstn.py +++ b/radiospectra/net/sources/rstn.py @@ -51,7 +51,7 @@ def search(self, *args, **kwargs): metalist = [] for obs in matchdict["Observatory"]: obs_path = self.observatory_map[obs.title()] - scraper = Scraper(pattern.replace("{obs}", obs_path)) + scraper = Scraper(format=pattern.replace("{obs}", obs_path)) tr = TimeRange(matchdict["Start Time"], matchdict["End Time"]) filesmeta = scraper._extract_files_meta(tr, matcher=matchdict) From 738df474ddb72ff613274ada5cdb33932c8ec96f Mon Sep 17 00:00:00 2001 From: Ninja-lgtm Date: Tue, 10 Feb 2026 00:37:10 +0530 Subject: [PATCH 4/8] Trigger CI restart From d4955848b53db589d0ccb282ebb9a666177a2e20 Mon Sep 17 00:00:00 2001 From: Ninja-lgtm Date: Tue, 10 Feb 2026 00:44:51 +0530 Subject: [PATCH 5/8] Trigger CI rerun after scraper migration verification From 67178edd7c17420d2df0cacdf2bdc1b044b1c7a6 Mon Sep 17 00:00:00 2001 From: Ninja-lgtm Date: Tue, 10 Feb 2026 01:02:41 +0530 Subject: [PATCH 6/8] Update pre-commit config to ignore exploration scripts --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ca8855..a9da88c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,6 +30,7 @@ repos: hooks: - id: codespell args: [ "--write-changes" ] +exclude: "^(check_scraper|reproduce_|test_issue_|prototype_slicing|test_prototype).*\\.py$" ci: autofix_prs: false autoupdate_schedule: "quarterly" From 0142a93a2d1c3fe10986ee92528dd24b8aa27097 Mon Sep 17 00:00:00 2001 From: Ninja-lgtm Date: Tue, 10 Feb 2026 01:13:01 +0530 Subject: [PATCH 7/8] Retry CI From c5ef68c0b626058a5891ed05eea1929661751a28 Mon Sep 17 00:00:00 2001 From: Ninja-lgtm Date: Tue, 10 Feb 2026 01:25:24 +0530 Subject: [PATCH 8/8] Trigger CI retry for transient GitHub failure