Skip to content

Commit 77b09ce

Browse files
authored
S3 support in url loader/resolver (#3702)
1 parent 14b162b commit 77b09ce

File tree

5 files changed

+35
-27
lines changed

5 files changed

+35
-27
lines changed

jdaviz/configs/imviz/plugins/parsers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from astropy.nddata import NDData
99
from astropy.wcs import WCS
1010
from astropy.utils.data import cache_contents
11+
from urllib.parse import urlparse
1112

1213
from glue.core.data import Component, Data
1314
from gwcs.wcs import WCS as GWCS
@@ -111,9 +112,9 @@ def parse_data(app, file_obj, ext=None, data_label=None,
111112
transformations.
112113
"""
113114
if isinstance(file_obj, str):
114-
file_obj = get_cloud_fits(
115-
file_obj, ext=ext, cache=cache, local_path=local_path, timeout=timeout)
116-
if not isinstance(file_obj, str):
115+
if urlparse(file_obj).scheme == 's3':
116+
file_obj = get_cloud_fits(
117+
file_obj, ext=ext)
117118
_parse_image(
118119
app, file_obj, data_label, ext=ext, parent=parent,
119120
try_gwcs_to_fits_sip=gwcs_to_fits_sip

jdaviz/core/loaders/resolvers/url/url.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from jdaviz.core.registries import loader_resolver_registry
88
from jdaviz.core.loaders.resolvers import BaseResolver
99
from jdaviz.core.user_api import LoaderUserApi
10-
from jdaviz.utils import download_uri_to_path
10+
from jdaviz.utils import download_uri_to_path, get_cloud_fits
1111

1212

1313
__all__ = ['URLResolver']
@@ -19,6 +19,7 @@ class URLResolver(BaseResolver):
1919
default_input = 'url'
2020

2121
url = Unicode("").tag(sync=True)
22+
url_scheme = Unicode("").tag(sync=True)
2223
cache = Bool(True).tag(sync=True)
2324
local_path = Unicode("").tag(sync=True)
2425
timeout = FloatHandleEmpty(10).tag(sync=True)
@@ -33,7 +34,7 @@ def user_api(self):
3334

3435
@property
3536
def is_valid(self):
36-
return urlparse(self.url.strip()).scheme in ['http', 'https', 'mast', 'ftp']
37+
return self.url_scheme in ['http', 'https', 'mast', 'ftp', 's3']
3738

3839
@property
3940
def default_label(self):
@@ -45,6 +46,8 @@ def default_label(self):
4546

4647
@observe('url', 'cache', 'timeout')
4748
def _on_url_changed(self, change):
49+
self.url_scheme = urlparse(self.url.strip()).scheme
50+
4851
# Clear the cached property to force re-download
4952
# or otherwise read from local file cache.
5053
if '_uri_output_file' in self.__dict__ and change['name'] in ('url', 'cache'):
@@ -54,6 +57,8 @@ def _on_url_changed(self, change):
5457

5558
@cached_property
5659
def _uri_output_file(self):
60+
if self.url_scheme == 's3':
61+
return get_cloud_fits(self.url.strip())
5762
return download_uri_to_path(self.url.strip(), cache=self.cache,
5863
local_path=self.local_path, timeout=self.timeout)
5964

jdaviz/core/loaders/resolvers/url/url.vue

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
></v-text-field>
2424
</v-row>
2525

26-
<v-row>
26+
<v-row v-if="url_scheme !== 's3'">
2727
<v-text-field
2828
v-model.number='timeout'
2929
type="number"
@@ -35,6 +35,7 @@
3535
</v-row>
3636

3737
<plugin-switch
38+
v-if="url_scheme !== 's3'"
3839
:value.sync="cache"
3940
label="Cache File"
4041
api_hint="ldr.cache = "

jdaviz/core/loaders/test_loaders.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ def test_resolver_url(deconfigged_helper):
121121
loader.url = 'not-valid-url'
122122
assert len(loader.format.choices) == 0
123123

124+
# s3 input
125+
loader.url = "s3://stpubdata/jwst/public/jw02727/L3/t/o002/jw02727-o002_t062_nircam_clear-f277w_i2d.fits" # noqa: E501
126+
assert loader._obj.url_scheme == 's3'
127+
assert len(loader.format.choices) > 0
128+
129+
# https valid input
124130
loader.url = 'https://stsci.box.com/shared/static/exnkul627fcuhy5akf2gswytud5tazmw.fits' # noqa
125131
assert len(loader.format.choices) == 4 # may change with future importers
126132
assert loader.format.selected == 'Image' # default may change with future importers

jdaviz/utils.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -558,8 +558,7 @@ def __setgluestate__(cls, rec, context):
558558
return cls(masks=masks)
559559

560560

561-
def get_cloud_fits(possible_uri, ext=None, cache=None, local_path=os.curdir, timeout=None,
562-
dryrun=False):
561+
def get_cloud_fits(possible_uri, ext=None):
563562
"""
564563
Retrieve and open a FITS file from an S3 URI using fsspec. Return the input
565564
unchanged if it is not an S3 URI.
@@ -579,10 +578,6 @@ def get_cloud_fits(possible_uri, ext=None, cache=None, local_path=os.curdir, tim
579578
Extension(s) to load from the FITS file. Can be an integer index (e.g., 0),
580579
a string name (e.g., "SCI"), or a list of such values. If `None`, all extensions
581580
are loaded.
582-
cache : None, bool, or str, optional
583-
local_path : str, optional
584-
timeout : float, optional
585-
dryrun : bool, optional
586581
587582
Returns
588583
-------
@@ -594,24 +589,24 @@ def get_cloud_fits(possible_uri, ext=None, cache=None, local_path=os.curdir, tim
594589
parsed_uri = urlparse(possible_uri)
595590

596591
# TODO: Add caching logic
597-
if parsed_uri.scheme.lower() == 's3':
598-
downloaded_hdus = []
599-
# this loads the requested extensions into local memory:
600-
with fits.open(possible_uri, fsspec_kwargs={"anon": True}) as hdul:
601-
if ext is None:
602-
ext_list = list(range(len(hdul)))
603-
elif not isinstance(ext, list):
604-
ext_list = [ext]
605-
else:
606-
ext_list = ext
607-
for extension in ext_list:
608-
hdu_obj = hdul[extension]
609-
downloaded_hdus.append(hdu_obj.copy())
592+
if not parsed_uri.scheme.lower() == 's3':
593+
raise ValueError("Not an S3 URI: {}".format(possible_uri))
594+
595+
downloaded_hdus = []
596+
# this loads the requested extensions into local memory:
597+
with fits.open(possible_uri, fsspec_kwargs={"anon": True}) as hdul:
598+
if ext is None:
599+
ext_list = list(range(len(hdul)))
600+
elif not isinstance(ext, list):
601+
ext_list = [ext]
602+
else:
603+
ext_list = ext
604+
for extension in ext_list:
605+
hdu_obj = hdul[extension]
606+
downloaded_hdus.append(hdu_obj.copy())
610607

611608
file_obj = fits.HDUList(downloaded_hdus)
612609
return file_obj
613-
# not s3 resource, return string as is
614-
return possible_uri
615610

616611

617612
def cached_uri(uri):

0 commit comments

Comments
 (0)