Skip to content

Commit 8838bb1

Browse files
committed
adding new methods for making pyclowder compatible with clowder v2
at different points in the code, the version will be checked different methods will use different endpoints and will use the bearer token instead of the key
1 parent 21a902a commit 8838bb1

File tree

3 files changed

+132
-17
lines changed

3 files changed

+132
-17
lines changed

pyclowder/connectors.py

Lines changed: 79 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@
5555
from email.mime.multipart import MIMEMultipart
5656
from string import Template
5757

58+
from dotenv import load_dotenv
59+
load_dotenv()
60+
5861

5962
class Connector(object):
6063
""" Class that will listen for messages.
@@ -179,6 +182,8 @@ def _build_resource(self, body, host, secret_key):
179182
intermediatefileid = body.get('intermediateId', '')
180183
datasetid = body.get('datasetId', '')
181184
filename = body.get('filename', '')
185+
if float(os.getenv('clowder_version')) == 2.0:
186+
token = body.get('token', ' ')
182187

183188
# determine resource type; defaults to file
184189
resource_type = "file"
@@ -237,15 +242,27 @@ def _build_resource(self, body, host, secret_key):
237242

238243
elif resource_type == "file":
239244
ext = os.path.splitext(filename)[1]
240-
return {
241-
"type": "file",
242-
"id": fileid,
243-
"intermediate_id": intermediatefileid,
244-
"name": filename,
245-
"file_ext": ext,
246-
"parent": {"type": "dataset",
247-
"id": datasetid}
248-
}
245+
if float(os.getenv('clowder_version')) == 2.0:
246+
return {
247+
"type": "file",
248+
"id": fileid,
249+
"intermediate_id": intermediatefileid,
250+
"name": filename,
251+
"file_ext": ext,
252+
"token": token,
253+
"parent": {"type": "dataset",
254+
"id": datasetid}
255+
}
256+
else:
257+
return {
258+
"type": "file",
259+
"id": fileid,
260+
"intermediate_id": intermediatefileid,
261+
"name": filename,
262+
"file_ext": ext,
263+
"parent": {"type": "dataset",
264+
"id": datasetid}
265+
}
249266

250267
elif resource_type == "metadata":
251268
return {
@@ -390,17 +407,26 @@ def _process_message(self, body):
390407
if not source_host.endswith('/'): source_host += '/'
391408
if not host.endswith('/'): host += '/'
392409
secret_key = body.get('secretKey', '')
410+
token = body.get('token', ' ')
393411
retry_count = 0 if 'retry_count' not in body else body['retry_count']
394412
resource = self._build_resource(body, host, secret_key)
395413
if not resource:
396414
logging.error("No resource found, this is bad.")
397415
return
398416

399417
# register extractor
400-
url = "%sapi/extractors" % source_host
401-
if url not in Connector.registered_clowder:
402-
Connector.registered_clowder.append(url)
403-
self.register_extractor("%s?key=%s" % (url, secret_key))
418+
# TODO make work for clowder2.0
419+
if float(os.getenv('clowder_version')) == 2.0:
420+
print('do differently')
421+
registration_url = "%sapi/v2/extractors" % source_host
422+
if registration_url not in Connector.registered_clowder:
423+
Connector.registered_clowder.append(registration_url)
424+
self.register_extractor_v2(registration_url, token)
425+
else:
426+
url = "%sapi/extractors" % source_host
427+
if url not in Connector.registered_clowder:
428+
Connector.registered_clowder.append(url)
429+
self.register_extractor("%s?key=%s" % (url, secret_key))
404430

405431
# tell everybody we are starting to process the file
406432
self.status_update(pyclowder.utils.StatusMessage.start, resource, "Started processing.")
@@ -420,12 +446,20 @@ def _process_message(self, body):
420446
found_local = False
421447
try:
422448
if check_result != pyclowder.utils.CheckMessage.bypass:
423-
file_metadata = pyclowder.files.download_info(self, host, secret_key, resource["id"])
449+
if float(os.getenv('clowder_version')) == 2.0:
450+
file_metadata = pyclowder.files.download_info_v2(self, host, token, resource["id"])
451+
else:
452+
file_metadata = pyclowder.files.download_info(self, host, secret_key, resource["id"])
424453
file_path = self._check_for_local_file(file_metadata)
425454
if not file_path:
426-
file_path = pyclowder.files.download(self, host, secret_key, resource["id"],
427-
resource["intermediate_id"],
428-
resource["file_ext"])
455+
if float(os.getenv('clowder_version')) == 2.0:
456+
file_path = pyclowder.files.download_v2(self, host, token, resource["id"],
457+
resource["intermediate_id"],
458+
resource["file_ext"])
459+
else:
460+
file_path = pyclowder.files.download(self, host, secret_key, resource["id"],
461+
resource["intermediate_id"],
462+
resource["file_ext"])
429463
else:
430464
found_local = True
431465
resource['local_paths'] = [file_path]
@@ -531,6 +565,34 @@ def register_extractor(self, endpoints):
531565
except Exception as exc: # pylint: disable=broad-except
532566
logger.exception('Error in registering extractor: ' + str(exc))
533567

568+
def register_extractor_v2(self, endpoint, token):
569+
"""Register extractor info with Clowder.
570+
571+
This assumes a file called extractor_info.json to be located in either the
572+
current working directory, or the folder where the main program is started.
573+
"""
574+
575+
# don't do any work if we wont register the endpoint
576+
if not endpoint or endpoint == "":
577+
return
578+
579+
logger = logging.getLogger(__name__)
580+
581+
headers = {'Content-Type': 'application/json',
582+
'Authorization': 'Bearer ' + token}
583+
data = self.extractor_info
584+
585+
if endpoint not in Connector.registered_clowder:
586+
Connector.registered_clowder.append(endpoint)
587+
try:
588+
result = requests.post(endpoint.strip(), headers=headers,
589+
data=json.dumps(data),
590+
verify=self.ssl_verify)
591+
result.raise_for_status()
592+
logger.debug("Registering extractor with %s : %s", url, result.text)
593+
except Exception as exc: # pylint: disable=broad-except
594+
logger.exception('Error in registering extractor: ' + str(exc))
595+
534596
# pylint: disable=no-self-use
535597
def status_update(self, status, resource, message):
536598
"""Sends a status message.

pyclowder/files.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,40 @@ def download(connector, host, key, fileid, intermediatefileid=None, ext=""):
5858
raise
5959

6060

61+
# pylint: disable=too-many-arguments
62+
def download_v2(connector, host, token, fileid, intermediatefileid=None, ext=""):
63+
"""Download file to be processed from Clowder.
64+
65+
Keyword arguments:
66+
connector -- connector information, used to get missing parameters and send status updates
67+
host -- the clowder host, including http and port, should end with a /
68+
key -- the secret key to login to clowder
69+
fileid -- the file that is currently being processed
70+
intermediatefileid -- either same as fileid, or the intermediate file to be used
71+
ext -- the file extension, the downloaded file will end with this extension
72+
"""
73+
74+
connector.message_process({"type": "file", "id": fileid}, "Downloading file.")
75+
76+
# TODO: intermediateid doesn't really seem to be used here, can we remove entirely?
77+
if not intermediatefileid:
78+
intermediatefileid = fileid
79+
80+
url = '%sapi/v2/files/%s' % (host, intermediatefileid)
81+
headers = {"Authorization": "Bearer " + token}
82+
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True, headers=headers)
83+
84+
(inputfile, inputfilename) = tempfile.mkstemp(suffix=ext)
85+
86+
try:
87+
with os.fdopen(inputfile, "wb") as outputfile:
88+
for chunk in result.iter_content(chunk_size=10*1024):
89+
outputfile.write(chunk)
90+
return inputfilename
91+
except Exception:
92+
os.remove(inputfilename)
93+
raise
94+
6195
def download_info(connector, host, key, fileid):
6296
"""Download file summary metadata from Clowder.
6397
@@ -69,12 +103,30 @@ def download_info(connector, host, key, fileid):
69103
"""
70104

71105
url = '%sapi/files/%s/metadata?key=%s' % (host, fileid, key)
106+
headers = {"Authorization": "Bearer " + token}
72107

73108
# fetch data
74109
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True)
75110

76111
return result.json()
77112

113+
def download_info_v2(connector, host, token, fileid):
114+
"""Download file summary metadata from Clowder.
115+
116+
Keyword arguments:
117+
connector -- connector information, used to get missing parameters and send status updates
118+
host -- the clowder host, including http and port, should end with a /
119+
key -- the secret key to login to clowder
120+
fileid -- the file to fetch metadata of
121+
"""
122+
123+
url = '%sapi/v2/files/%s/metadata' % (host, fileid)
124+
headers = {"Authorization": "Bearer " + token}
125+
# fetch data
126+
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True, headers=headers)
127+
128+
return result.json()
129+
78130

79131
def download_metadata(connector, host, key, fileid, extractor=None):
80132
"""Download file JSON-LD metadata from Clowder.

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ requests-toolbelt==0.9.1
2222
# via pyclowder (setup.py)
2323
urllib3==1.26.8
2424
# via requests
25+
dotenv

0 commit comments

Comments
 (0)