Merge pull request #2217 from bsipocz/ned_add_fileformat_option

bsipocz · web-flow · commit 4c7b6520fb44 · 2021-11-17T12:27:46.000-08:00
Add file_format option for Ned.get_image_list()
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -22,6 +22,12 @@ gaia
 - The bug which caused changing the ``MAIN_GAIA_TABLE`` option to have no
   effect has been fixed [#2153]
 
+ipac.ned
+^^^^^^^^
+
+- Keyword 'file_format' is added to ``get_image_list`` to enable obtaining
+  links to non-fits file formats, too. [#2217]
+
 vizier
 ^^^^^^
 
diff --git a/astroquery/ipac/ned/core.py b/astroquery/ipac/ned/core.py
@@ -427,7 +427,7 @@ def get_spectra_async(self, object_name, get_query_payload=False,
                           show_progress=True):
         """
         Serves the same purpose as `~NedClass.get_spectra` but returns
-        file-handlers to the remote files rather than downloading them.
+        file-handlers to the remote fits files rather than downloading them.
 
         Parameters
         ----------
@@ -443,14 +443,15 @@ def get_spectra_async(self, object_name, get_query_payload=False,
 
         """
         image_urls = self.get_image_list(object_name, item='spectra',
-                                         get_query_payload=get_query_payload)
+                                         get_query_payload=get_query_payload,
+                                         file_format='fits')
         if get_query_payload:
             return image_urls
         return [commons.FileContainer(U, encoding='binary',
                                       show_progress=show_progress)
                 for U in image_urls]
 
-    def get_image_list(self, object_name, item='image',
+    def get_image_list(self, object_name, *, item='image', file_format='fits',
                        get_query_payload=False):
         """
         Helper function that returns a list of urls from which to download
@@ -466,6 +467,10 @@ def get_image_list(self, object_name, item='image',
         item : str, optional
             Can be either 'image' or 'spectra'. Defaults to 'image'.
             Required to decide the right URL to query.
+        file_format : str, optional
+            Format of images/spectra to return. Defaults to 'fits'.
+            Other options available: 'author-ascii', 'NED-ascii', 'VO-table'.
+
 
         Returns
         -------
@@ -483,9 +488,9 @@ def get_image_list(self, object_name, item='image',
         url = Ned.SPECTRA_URL if item == 'spectra' else Ned.IMG_DATA_URL
         response = self._request("GET", url=url, params=request_payload,
                                  timeout=Ned.TIMEOUT)
-        return self.extract_image_urls(response.text)
+        return self._extract_image_urls(response.text, file_format=file_format)
 
-    def extract_image_urls(self, html_in):
+    def _extract_image_urls(self, html_in, file_format='fits'):
         """
         Helper function that uses regexps to extract the image urls from the
         given HTML.
@@ -495,10 +500,25 @@ def extract_image_urls(self, html_in):
         html_in : str
             source from which the urls are to be extracted
 
+        format : str, optional
+            Format of spectra to return. Defaults to 'fits'.
+            Other options available: 'author-ascii', 'NED-ascii', 'VO-table'.
+
         """
         base_url = 'http://ned.ipac.caltech.edu'
+
+        extensions = {'fits': 'fits.gz',
+                      'author-ascii': 'txt',
+                      'NED-ascii': '_NED.txt',
+                      'VO-table': '_votable.xml'}
+
+        names = {'fits': 'FITS',
+                 'author-ascii': 'Author-ASCII',
+                 'NED-ascii': 'NED-ASCII',
+                 'VO-table': 'VOTable'}
+
         pattern = re.compile(
-            r'<a\s+href\s*?="?\s*?(.+?fits.gz)"?\s*?>\s*?(?:Retrieve|FITS)</a>',
+            f'<a\s+href\s*?="?\s*?(.+?{extensions[file_format]})"?\s*?>\s*?(?:Retrieve|{names[file_format]})</a>',
             re.IGNORECASE)
         matched_urls = pattern.findall(html_in)
         url_list = [base_url + img_url for img_url in matched_urls]
diff --git a/astroquery/ipac/ned/tests/test_ned.py b/astroquery/ipac/ned/tests/test_ned.py
@@ -141,7 +141,7 @@ def test_photometry(patch_get):
 
 def test_extract_image_urls():
     html_in = open(data_path(DATA_FILES['extract_urls']), 'r').read()
-    url_list = ned.core.Ned.extract_image_urls(html_in)
+    url_list = ned.core.Ned._extract_image_urls(html_in)
     assert len(url_list) == 5
     for url in url_list:
         assert url.endswith('fits.gz')
diff --git a/astroquery/ipac/ned/tests/test_ned_remote.py b/astroquery/ipac/ned/tests/test_ned_remote.py
@@ -93,3 +93,11 @@ def test_get_object_notes_async(self):
     def test_get_object_notes(self):
         result = ned.core.Ned.get_table('3c 273', table='object_notes')
         assert isinstance(result, Table)
+
+    def test_file_format(self):
+        result_ascii = ned.core.Ned.get_image_list('NGC6060', item='spectra',
+                                                   file_format='NED-ascii')
+        result_fits = ned.core.Ned.get_image_list('NGC6060', item='spectra',
+                                                  file_format='fits')
+        assert len(result_ascii) == 3
+        assert len(result_fits) == 1