Skip to content

Commit e20ab16

Browse files
committed
Merge pull request #500 from keflavich/alma_api_update
Update to match ALMA upstream API change
2 parents 29c0725 + b67bbd0 commit e20ab16

File tree

10 files changed

+58754
-1266
lines changed

10 files changed

+58754
-1266
lines changed

CHANGES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
- Bugfix for ``utils.commons.send_request()``: Raise exception if error status
55
is returned in the response. (#491)
6+
- Update for ALMA Cycle 3 API change (#500)
67

78
0.2.3 (2014-09-30)
89
------------------

astroquery/alma/core.py

Lines changed: 111 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy as np
1111
import re
1212
import tarfile
13+
import string
1314
from bs4 import BeautifulSoup
1415

1516
from astropy.extern.six.moves.urllib_parse import urljoin
@@ -64,7 +65,7 @@ def query_object_async(self, object_name, cache=True, public=True,
6465

6566
if payload is None:
6667
payload = {}
67-
payload.update({'source_name_sesame': object_name,})
68+
payload.update({'source_name_resolver': object_name,})
6869

6970
return self.query_async(payload, cache=cache, public=public,
7071
science=science, **kwargs)
@@ -98,7 +99,7 @@ def query_region_async(self, coordinate, radius, cache=True, public=True,
9899

99100
if payload is None:
100101
payload = {}
101-
payload.update({'raDecCoordinates': rdc})
102+
payload.update({'ra_dec': rdc})
102103

103104
return self.query_async(payload, cache=cache, public=public,
104105
science=science, **kwargs)
@@ -121,13 +122,14 @@ def query_async(self, payload, cache=True, public=True, science=True):
121122
Return only data marked as "science" in the archive?
122123
"""
123124

124-
url = urljoin(self._get_dataarchive_url(), 'aq/search.votable')
125+
url = urljoin(self._get_dataarchive_url(), 'aq/')
125126

126-
payload.update({'viewFormat':'raw',})
127+
payload.update({'result_view':'raw', 'format':'VOTABLE',
128+
'download':'true'})
127129
if public:
128-
payload['publicFilterFlag'] = 'public'
130+
payload['public_data'] = 'public'
129131
if science:
130-
payload['scan_intent-asu'] = '=*TARGET*'
132+
payload['science_observations'] = '=%TARGET%'
131133

132134
self.validate_query(payload)
133135

@@ -380,7 +382,7 @@ def _parse_result(self, response, verbose=False):
380382
tf = six.BytesIO(response.content)
381383
vo_tree = votable.parse(tf, pedantic=False, invalid='mask')
382384
first_table = vo_tree.get_first_table()
383-
table = first_table.to_table()
385+
table = first_table.to_table(use_names_over_ids=True)
384386
return table
385387

386388
def _login(self, username, store_password=False):
@@ -613,10 +615,15 @@ def help(self, cache=True):
613615
print()
614616
print(title)
615617
for row in section:
616-
if len(row) == 2:
618+
if len(row) == 2: # text value
617619
name,payload_keyword = row
618620
print(" {0:33s}: {1:35s}".format(name,payload_keyword))
619-
elif len(row) == 4:
621+
#elif len(row) == 3: # radio button
622+
# name,payload_keyword,value = row
623+
# print(" {0:33s}: {1:20s} = {2:15s}".format(name,
624+
# payload_keyword,
625+
# value))
626+
elif len(row) == 4: # radio button or checkbox
620627
name,payload_keyword,checkbox,value = row
621628
print(" {2} {0:29s}: {1:20s} = {3:15s}".format(name,
622629
payload_keyword,
@@ -633,43 +640,74 @@ def _get_help_page(self, cache=True):
633640
root = BeautifulSoup(querypage.content)
634641
sections = root.findAll('td', class_='category')
635642

643+
whitespace = re.compile("\s+")
644+
636645
help_list = []
637646
for section in sections:
638647
title = section.find('div', class_='categorytitle').text.lstrip()
639648
help_section = (title,[])
640649
for inp in section.findAll('div', class_='inputdiv'):
641650
sp = inp.find('span')
642-
if sp is not None:
643-
payload_keyword = sp.attrs['class'][0]
644-
name = sp.text
645-
help_section[1].append((name,payload_keyword))
646-
else:
647-
buttons = inp.findAll('input')
648-
for b in buttons:
649-
payload_keyword = b.attrs['name']
650-
bid = b.attrs['id']
651-
label = inp.find('label')
652-
checked = b.attrs['checked'] == 'checked'
651+
buttons = inp.findAll('input')
652+
for b in buttons:
653+
# old version:for=id=rawView; name=viewFormat
654+
# new version:for=id=rawView; name=result_view
655+
payload_keyword = b.attrs['name']
656+
bid = b.attrs['id']
657+
label = inp.find('label')
658+
if sp is not None:
659+
name = whitespace.sub(" ", sp.text)
660+
elif label.attrs['for'] == bid:
661+
name = whitespace.sub(" ", label.text)
662+
else:
663+
raise TableParseError("ALMA query page has"
664+
" an unrecognized entry")
665+
if b.attrs['type'] == 'text':
666+
help_section[1].append((name, payload_keyword))
667+
elif b.attrs['type'] == 'radio':
653668
value = b.attrs['value']
654-
if label.attrs['for'] == bid:
655-
name = label.text
669+
if 'checked' in b.attrs:
670+
checked = b.attrs['checked'] == 'checked'
671+
checkbox = "(x)" if checked else "( )"
656672
else:
657-
raise TableParseError("ALMA query page has"
658-
" an unrecognized entry")
673+
checkbox = "( )"
674+
help_section[1].append((name, payload_keyword,
675+
checkbox, value))
676+
elif b.attrs['type'] == 'checkbox':
677+
checked = b.attrs['checked'] == 'checked'
678+
value = b.attrs['value']
659679
checkbox = "[x]" if checked else "[ ]"
660680
help_section[1].append((name, payload_keyword,
661681
checkbox, value))
682+
select = inp.find('select')
683+
if select is not None:
684+
options = [(filter_printable(option.text),
685+
option.attrs['value'])
686+
for option in select.findAll('option')]
687+
if sp is not None:
688+
name = whitespace.sub(" ", sp.text)
689+
else:
690+
name = select.attrs['name']
691+
option_str = " , ".join(["{0} = {1}".format(o[0],o[1])
692+
for o in options])
693+
help_section[1].append((name, option_str))
694+
695+
662696
help_list.append(help_section)
663697
self._help_list = help_list
664698

665699
return self._help_list
666700

667701
def _validate_payload(self, payload):
668702
if not hasattr(self, '_valid_params'):
669-
help_list = self._get_help_page()
703+
help_list = self._get_help_page(cache=False)
670704
self._valid_params = [row[1]
671705
for title,section in help_list
672706
for row in section]
707+
# These parameters are entirely hidden, but Felix says they are
708+
# allowed
709+
self._valid_params.append('download')
710+
self._valid_params.append('format')
673711
invalid_params = [k for k in payload if k not in self._valid_params]
674712
if len(invalid_params) > 0:
675713
raise InvalidQueryError("The following parameters are not accepted "
@@ -691,24 +729,26 @@ def _parse_staging_request_page(self, data_list_page):
691729

692730
root = BeautifulSoup(data_list_page.content, 'html5lib')
693731

694-
for link in root.findAll('a'):
695-
if 'script.sh' in link.text:
696-
download_script_url = urljoin(self.dataarchive_url,
697-
link['href'])
698-
699-
download_script = self._request('GET', download_script_url,
700-
cache=False)
701-
download_script_target_urls = []
702-
for line in download_script.text.split('\n'):
703-
if line and line.split() and line.split()[0] == 'wget':
704-
download_script_target_urls.append(line.split()[1].strip('"'))
705-
706-
if len(download_script_target_urls) == 0:
707-
raise RemoteServiceError("There was an error parsing the download "
708-
"script; it is empty. "
709-
"You can access the download script "
710-
"directly from this URL: "
711-
"{0}".format(download_script_url))
732+
#for link in root.findAll('a'):
733+
# if 'script.sh' in link.text:
734+
# download_script_url = urljoin(self.dataarchive_url,
735+
# link['href'])
736+
#if 'download_script_url' not in locals():
737+
# raise RemoteServiceError("No download links were found.")
738+
739+
#download_script = self._request('GET', download_script_url,
740+
# cache=False)
741+
#download_script_target_urls = []
742+
#for line in download_script.text.split('\n'):
743+
# if line and line.split() and line.split()[0] == 'wget':
744+
# download_script_target_urls.append(line.split()[1].strip('"'))
745+
746+
#if len(download_script_target_urls) == 0:
747+
# raise RemoteServiceError("There was an error parsing the download "
748+
# "script; it is empty. "
749+
# "You can access the download script "
750+
# "directly from this URL: "
751+
# "{0}".format(download_script_url))
712752

713753
data_table = root.findAll('table', class_='list', id='report')[0]
714754
columns = {'uid':[], 'URL':[], 'size':[]}
@@ -776,32 +816,31 @@ def _parse_staging_request_page(self, data_list_page):
776816

777817
if len(columns['uid']) == 0:
778818
raise RemoteServiceError("No valid UIDs were found in the staged "
779-
"data table. Please include {0} and {1}"
819+
"data table. Please include {0} "
780820
"in a bug report."
781-
.format(self._staging_log['data_list_url'],
782-
download_script_url))
783-
784-
if len(download_script_target_urls) != len(columns['URL']):
785-
log.warn("There was an error parsing the data staging page. "
786-
"The results from the page and the download script "
787-
"differ. You can access the download script directly "
788-
"from this URL: {0}".format(download_script_url))
789-
else:
790-
bad_urls = []
791-
for (rurl,url) in (zip(columns['URL'],
792-
download_script_target_urls)):
793-
if rurl == 'None_Found':
794-
url_uid = os.path.split(url)[-1]
795-
ind = np.where(np.array(columns['uid']) == url_uid)[0][0]
796-
columns['URL'][ind] = url
797-
elif rurl != url:
798-
bad_urls.append((rurl, url))
799-
if bad_urls:
800-
log.warn("There were mismatches between the parsed URLs "
801-
"from the staging page ({0}) and the download "
802-
"script ({1})."
803-
.format(self._staging_log['data_list_url'],
804-
download_script_url))
821+
.format(self._staging_log['data_list_url']))
822+
823+
#if len(download_script_target_urls) != len(columns['URL']):
824+
# log.warn("There was an error parsing the data staging page. "
825+
# "The results from the page and the download script "
826+
# "differ. You can access the download script directly "
827+
# "from this URL: {0}".format(download_script_url))
828+
#else:
829+
# bad_urls = []
830+
# for (rurl,url) in (zip(columns['URL'],
831+
# download_script_target_urls)):
832+
# if rurl == 'None_Found':
833+
# url_uid = os.path.split(url)[-1]
834+
# ind = np.where(np.array(columns['uid']) == url_uid)[0][0]
835+
# columns['URL'][ind] = url
836+
# elif rurl != url:
837+
# bad_urls.append((rurl, url))
838+
# if bad_urls:
839+
# log.warn("There were mismatches between the parsed URLs "
840+
# "from the staging page ({0}) and the download "
841+
# "script ({1})."
842+
# .format(self._staging_log['data_list_url'],
843+
# download_script_url))
805844

806845
tbl = Table([Column(name=k, data=v) for k,v in iteritems(columns)])
807846

@@ -833,6 +872,10 @@ def unique(seq):
833872
seen_add = seen.add
834873
return [x for x in seq if not (x in seen or seen_add(x))]
835874

875+
def filter_printable(s):
876+
""" extract printable characters from a string """
877+
return filter(lambda x: x in string.printable, s)
878+
836879
def parse_frequency_support(frequency_support_str):
837880
"""
838881
ALMA "Frequency Support" strings have the form:

0 commit comments

Comments
 (0)