1010import numpy as np
1111import re
1212import tarfile
13+ import string
1314from bs4 import BeautifulSoup
1415
1516from astropy .extern .six .moves .urllib_parse import urljoin
@@ -64,7 +65,7 @@ def query_object_async(self, object_name, cache=True, public=True,
6465
6566 if payload is None :
6667 payload = {}
67- payload .update ({'source_name_sesame ' : object_name ,})
68+ payload .update ({'source_name_resolver ' : object_name ,})
6869
6970 return self .query_async (payload , cache = cache , public = public ,
7071 science = science , ** kwargs )
@@ -98,7 +99,7 @@ def query_region_async(self, coordinate, radius, cache=True, public=True,
9899
99100 if payload is None :
100101 payload = {}
101- payload .update ({'raDecCoordinates ' : rdc })
102+ payload .update ({'ra_dec ' : rdc })
102103
103104 return self .query_async (payload , cache = cache , public = public ,
104105 science = science , ** kwargs )
@@ -121,13 +122,14 @@ def query_async(self, payload, cache=True, public=True, science=True):
121122 Return only data marked as "science" in the archive?
122123 """
123124
124- url = urljoin (self ._get_dataarchive_url (), 'aq/search.votable ' )
125+ url = urljoin (self ._get_dataarchive_url (), 'aq/' )
125126
126- payload .update ({'viewFormat' :'raw' ,})
127+ payload .update ({'result_view' :'raw' , 'format' :'VOTABLE' ,
128+ 'download' :'true' })
127129 if public :
128- payload ['publicFilterFlag ' ] = 'public'
130+ payload ['public_data ' ] = 'public'
129131 if science :
130- payload ['scan_intent-asu ' ] = '=* TARGET* '
132+ payload ['science_observations ' ] = '=% TARGET% '
131133
132134 self .validate_query (payload )
133135
@@ -380,7 +382,7 @@ def _parse_result(self, response, verbose=False):
380382 tf = six .BytesIO (response .content )
381383 vo_tree = votable .parse (tf , pedantic = False , invalid = 'mask' )
382384 first_table = vo_tree .get_first_table ()
383- table = first_table .to_table ()
385+ table = first_table .to_table (use_names_over_ids = True )
384386 return table
385387
386388 def _login (self , username , store_password = False ):
@@ -613,10 +615,15 @@ def help(self, cache=True):
613615 print ()
614616 print (title )
615617 for row in section :
616- if len (row ) == 2 :
618+ if len (row ) == 2 : # text value
617619 name ,payload_keyword = row
618620 print (" {0:33s}: {1:35s}" .format (name ,payload_keyword ))
619- elif len (row ) == 4 :
621+ #elif len(row) == 3: # radio button
622+ # name,payload_keyword,value = row
623+ # print(" {0:33s}: {1:20s} = {2:15s}".format(name,
624+ # payload_keyword,
625+ # value))
626+ elif len (row ) == 4 : # radio button or checkbox
620627 name ,payload_keyword ,checkbox ,value = row
621628 print (" {2} {0:29s}: {1:20s} = {3:15s}" .format (name ,
622629 payload_keyword ,
@@ -633,43 +640,74 @@ def _get_help_page(self, cache=True):
633640 root = BeautifulSoup (querypage .content )
634641 sections = root .findAll ('td' , class_ = 'category' )
635642
643+ whitespace = re .compile ("\s+" )
644+
636645 help_list = []
637646 for section in sections :
638647 title = section .find ('div' , class_ = 'categorytitle' ).text .lstrip ()
639648 help_section = (title ,[])
640649 for inp in section .findAll ('div' , class_ = 'inputdiv' ):
641650 sp = inp .find ('span' )
642- if sp is not None :
643- payload_keyword = sp .attrs ['class' ][0 ]
644- name = sp .text
645- help_section [1 ].append ((name ,payload_keyword ))
646- else :
647- buttons = inp .findAll ('input' )
648- for b in buttons :
649- payload_keyword = b .attrs ['name' ]
650- bid = b .attrs ['id' ]
651- label = inp .find ('label' )
652- checked = b .attrs ['checked' ] == 'checked'
651+ buttons = inp .findAll ('input' )
652+ for b in buttons :
653+ # old version:for=id=rawView; name=viewFormat
654+ # new version:for=id=rawView; name=result_view
655+ payload_keyword = b .attrs ['name' ]
656+ bid = b .attrs ['id' ]
657+ label = inp .find ('label' )
658+ if sp is not None :
659+ name = whitespace .sub (" " , sp .text )
660+ elif label .attrs ['for' ] == bid :
661+ name = whitespace .sub (" " , label .text )
662+ else :
663+ raise TableParseError ("ALMA query page has"
664+ " an unrecognized entry" )
665+ if b .attrs ['type' ] == 'text' :
666+ help_section [1 ].append ((name , payload_keyword ))
667+ elif b .attrs ['type' ] == 'radio' :
653668 value = b .attrs ['value' ]
654- if label .attrs ['for' ] == bid :
655- name = label .text
669+ if 'checked' in b .attrs :
670+ checked = b .attrs ['checked' ] == 'checked'
671+ checkbox = "(x)" if checked else "( )"
656672 else :
657- raise TableParseError ("ALMA query page has"
658- " an unrecognized entry" )
673+ checkbox = "( )"
674+ help_section [1 ].append ((name , payload_keyword ,
675+ checkbox , value ))
676+ elif b .attrs ['type' ] == 'checkbox' :
677+ checked = b .attrs ['checked' ] == 'checked'
678+ value = b .attrs ['value' ]
659679 checkbox = "[x]" if checked else "[ ]"
660680 help_section [1 ].append ((name , payload_keyword ,
661681 checkbox , value ))
682+ select = inp .find ('select' )
683+ if select is not None :
684+ options = [(filter_printable (option .text ),
685+ option .attrs ['value' ])
686+ for option in select .findAll ('option' )]
687+ if sp is not None :
688+ name = whitespace .sub (" " , sp .text )
689+ else :
690+ name = select .attrs ['name' ]
691+ option_str = " , " .join (["{0} = {1}" .format (o [0 ],o [1 ])
692+ for o in options ])
693+ help_section [1 ].append ((name , option_str ))
694+
695+
662696 help_list .append (help_section )
663697 self ._help_list = help_list
664698
665699 return self ._help_list
666700
667701 def _validate_payload (self , payload ):
668702 if not hasattr (self , '_valid_params' ):
669- help_list = self ._get_help_page ()
703+ help_list = self ._get_help_page (cache = False )
670704 self ._valid_params = [row [1 ]
671705 for title ,section in help_list
672706 for row in section ]
707+ # These parameters are entirely hidden, but Felix says they are
708+ # allowed
709+ self ._valid_params .append ('download' )
710+ self ._valid_params .append ('format' )
673711 invalid_params = [k for k in payload if k not in self ._valid_params ]
674712 if len (invalid_params ) > 0 :
675713 raise InvalidQueryError ("The following parameters are not accepted "
@@ -691,24 +729,26 @@ def _parse_staging_request_page(self, data_list_page):
691729
692730 root = BeautifulSoup (data_list_page .content , 'html5lib' )
693731
694- for link in root .findAll ('a' ):
695- if 'script.sh' in link .text :
696- download_script_url = urljoin (self .dataarchive_url ,
697- link ['href' ])
698-
699- download_script = self ._request ('GET' , download_script_url ,
700- cache = False )
701- download_script_target_urls = []
702- for line in download_script .text .split ('\n ' ):
703- if line and line .split () and line .split ()[0 ] == 'wget' :
704- download_script_target_urls .append (line .split ()[1 ].strip ('"' ))
705-
706- if len (download_script_target_urls ) == 0 :
707- raise RemoteServiceError ("There was an error parsing the download "
708- "script; it is empty. "
709- "You can access the download script "
710- "directly from this URL: "
711- "{0}" .format (download_script_url ))
732+ #for link in root.findAll('a'):
733+ # if 'script.sh' in link.text:
734+ # download_script_url = urljoin(self.dataarchive_url,
735+ # link['href'])
736+ #if 'download_script_url' not in locals():
737+ # raise RemoteServiceError("No download links were found.")
738+
739+ #download_script = self._request('GET', download_script_url,
740+ # cache=False)
741+ #download_script_target_urls = []
742+ #for line in download_script.text.split('\n'):
743+ # if line and line.split() and line.split()[0] == 'wget':
744+ # download_script_target_urls.append(line.split()[1].strip('"'))
745+
746+ #if len(download_script_target_urls) == 0:
747+ # raise RemoteServiceError("There was an error parsing the download "
748+ # "script; it is empty. "
749+ # "You can access the download script "
750+ # "directly from this URL: "
751+ # "{0}".format(download_script_url))
712752
713753 data_table = root .findAll ('table' , class_ = 'list' , id = 'report' )[0 ]
714754 columns = {'uid' :[], 'URL' :[], 'size' :[]}
@@ -776,32 +816,31 @@ def _parse_staging_request_page(self, data_list_page):
776816
777817 if len (columns ['uid' ]) == 0 :
778818 raise RemoteServiceError ("No valid UIDs were found in the staged "
779- "data table. Please include {0} and {1} "
819+ "data table. Please include {0} "
780820 "in a bug report."
781- .format (self ._staging_log ['data_list_url' ],
782- download_script_url ))
783-
784- if len (download_script_target_urls ) != len (columns ['URL' ]):
785- log .warn ("There was an error parsing the data staging page. "
786- "The results from the page and the download script "
787- "differ. You can access the download script directly "
788- "from this URL: {0}" .format (download_script_url ))
789- else :
790- bad_urls = []
791- for (rurl ,url ) in (zip (columns ['URL' ],
792- download_script_target_urls )):
793- if rurl == 'None_Found' :
794- url_uid = os .path .split (url )[- 1 ]
795- ind = np .where (np .array (columns ['uid' ]) == url_uid )[0 ][0 ]
796- columns ['URL' ][ind ] = url
797- elif rurl != url :
798- bad_urls .append ((rurl , url ))
799- if bad_urls :
800- log .warn ("There were mismatches between the parsed URLs "
801- "from the staging page ({0}) and the download "
802- "script ({1})."
803- .format (self ._staging_log ['data_list_url' ],
804- download_script_url ))
821+ .format (self ._staging_log ['data_list_url' ]))
822+
823+ #if len(download_script_target_urls) != len(columns['URL']):
824+ # log.warn("There was an error parsing the data staging page. "
825+ # "The results from the page and the download script "
826+ # "differ. You can access the download script directly "
827+ # "from this URL: {0}".format(download_script_url))
828+ #else:
829+ # bad_urls = []
830+ # for (rurl,url) in (zip(columns['URL'],
831+ # download_script_target_urls)):
832+ # if rurl == 'None_Found':
833+ # url_uid = os.path.split(url)[-1]
834+ # ind = np.where(np.array(columns['uid']) == url_uid)[0][0]
835+ # columns['URL'][ind] = url
836+ # elif rurl != url:
837+ # bad_urls.append((rurl, url))
838+ # if bad_urls:
839+ # log.warn("There were mismatches between the parsed URLs "
840+ # "from the staging page ({0}) and the download "
841+ # "script ({1})."
842+ # .format(self._staging_log['data_list_url'],
843+ # download_script_url))
805844
806845 tbl = Table ([Column (name = k , data = v ) for k ,v in iteritems (columns )])
807846
@@ -833,6 +872,10 @@ def unique(seq):
833872 seen_add = seen .add
834873 return [x for x in seq if not (x in seen or seen_add (x ))]
835874
875+ def filter_printable (s ):
876+ """ extract printable characters from a string """
877+ return filter (lambda x : x in string .printable , s )
878+
836879def parse_frequency_support (frequency_support_str ):
837880 """
838881 ALMA "Frequency Support" strings have the form:
0 commit comments