1212import warnings
1313import json
1414import time
15+ import string
1516import os
1617import re
1718import keyring
@@ -822,6 +823,7 @@ def __init__(self, *args, **kwargs):
822823 super (ObservationsClass , self ).__init__ (* args , ** kwargs )
823824
824825 self ._boto3 = None
826+ self ._botocore = None
825827
826828 def list_missions (self ):
827829 """
@@ -1303,7 +1305,9 @@ def enable_s3_hst_dataset(self):
13031305 Requires the boto3 library to function.
13041306 """
13051307 import boto3
1308+ import botocore
13061309 self ._boto3 = boto3
1310+ self ._botocore = botocore
13071311
13081312 log .info ("Using the S3 HST public dataset" )
13091313 log .warning ("Your AWS account will be charged for access to the S3 bucket" )
@@ -1316,6 +1320,7 @@ def disable_s3_hst_dataset(self):
13161320 Disables downloading HST public files from S3 instead of MAST
13171321 """
13181322 self ._boto3 = None
1323+ self ._botocore = None
13191324
13201325 def _download_from_s3 (self , dataProduct , localPath , cache = True ):
13211326 # The following is a mishmash of BaseQuery._download_file and s3 access through boto
@@ -1328,18 +1333,19 @@ def _download_from_s3(self, dataProduct, localPath, cache=True):
13281333 bkt = s3 .Bucket (bkt_name )
13291334
13301335 dataUri = dataProduct ['dataURI' ]
1336+ filename = dataUri .split ("/" )[- 1 ]
13311337 obs_id = dataProduct ['obs_id' ]
13321338
13331339 obs_id = obs_id .lower ()
1334-
1340+
13351341 # This next part is a bit funky. Let me explain why:
13361342 # We have 2 different possible URI schemes for HST:
13371343 # mast:HST/product/obs_id_filename.type (old style)
13381344 # mast:HST/product/obs_id/obs_id_filename.type (new style)
13391345 # The first scheme was developed thinking that the obs_id in the filename
13401346 # would *always* match the actual obs_id folder the file was placed in.
13411347 # Unfortunately this assumption was false.
1342- # We have been trying to switch to the new uri scheme as it specifies the
1348+ # We have been trying to switch to the new uri scheme as it specifies the
13431349 # obs_id used in the folder path correctly.
13441350 # The cherry on top is that the obs_id in the new style URI is not always correct either!
13451351 # When we are looking up files we have some code which iterates through all of
@@ -1348,23 +1354,38 @@ def _download_from_s3(self, dataProduct, localPath, cache=True):
13481354 # So in conclusion we can't trust the last char obs_id from the file or from the database
13491355 # So with that in mind, hold your nose when reading the following:
13501356
1351- # magic associations logic per Brian - 0-9/a-e we convert to 0
1352- magicValues = "123456789abcde"
1353- if obs_id [- 1 ] in magicValues :
1354- # We only replace the first occurrence in the folder
1355- # The filename remains with the original obs_id as part of the name
1356- new_obs_id = obs_id [:- 1 ] + "0"
1357- dataUri = dataUri .replace (obs_id , new_obs_id , 1 )
1358- obs_id = new_obs_id
1359- log .warning ("This data product's path may not have been properly identified %s" % dataUri )
1357+ info_lookup = None
1358+ sane_path = os .path .join ("hst" , "public" , obs_id [:4 ], obs_id , filename )
1359+ try :
1360+ info_lookup = s3_client .head_object (Bucket = bkt_name , Key = sane_path , RequestPayer = 'requester' )
1361+ bucketPath = sane_path
1362+ except self ._botocore .exceptions .ClientError as e :
1363+ if e .response ['Error' ]['Code' ] != "404" :
1364+ raise
1365+
1366+ if info_lookup is None :
1367+ # Unfortunately our file placement logic is anything but sane
1368+ # We put files in folders that don't make sense
1369+ for ch in (string .digits + string .ascii_lowercase ):
1370+ # The last char of the obs_folder (observation id) can be any lowercase or numeric char
1371+ insane_obs = obs_id [:- 1 ] + ch
1372+ insane_path = os .path .join ("hst" , "public" , insane_obs [:4 ], insane_obs , filename )
1373+
1374+ try :
1375+ info_lookup = s3_client .head_object (Bucket = bkt_name , Key = insane_path , RequestPayer = 'requester' )
1376+ bucketPath = insane_path
1377+ break
1378+ except self ._botocore .exceptions .ClientError as e :
1379+ if e .response ['Error' ]['Code' ] != "404" :
1380+ raise
13601381
1361- bucketPath = "hst/public/" + obs_id [:4 ] + dataUri .replace ("mast:HST/product" , "" )
1382+ if info_lookup is None :
1383+ raise Exception ("Unable to locate file!" )
13621384
13631385 # Unfortunately, we can't use the reported file size in the reported product. STScI's backing
13641386 # archive database (CAOM) is frequently out of date and in many cases omits the required information.
13651387 # length = dataProduct["size"]
13661388 # Instead we ask the webserver (in this case S3) what the expected content length is and use that.
1367- info_lookup = s3_client .head_object (Bucket = bkt_name , Key = bucketPath , RequestPayer = 'requester' )
13681389 length = info_lookup ["ContentLength" ]
13691390
13701391 if cache and os .path .exists (localPath ):
0 commit comments