Skip to content

Commit b196e0d

Browse files
committed
Merge remote-tracking branch 'origin/50-clowder20-submit-file-to-extractor' into 50-clowder20-submit-file-to-extractor
2 parents a836d74 + f581ff5 commit b196e0d

File tree

7 files changed

+46
-102
lines changed

7 files changed

+46
-102
lines changed

pyclowder/api/v1/datasets.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@
1010
from pyclowder.collections import get_datasets, get_child_collections, delete as delete_collection
1111
from pyclowder.utils import StatusMessage
1212

13-
from dotenv import load_dotenv
14-
load_dotenv()
15-
clowder_version = float(os.getenv('clowder_version'))
16-
1713

1814
def create_empty(connector, host, key, datasetname, description, parentid=None, spaceid=None):
1915
"""Create a new dataset in Clowder.

pyclowder/api/v2/files.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@
1515
from pyclowder.datasets import get_file_list
1616
from pyclowder.collections import get_datasets, get_child_collections
1717

18-
from dotenv import load_dotenv
19-
load_dotenv()
20-
clowder_version = float(os.getenv('clowder_version'))
21-
2218
# Some sources of urllib3 support warning suppression, but not all
2319
try:
2420
from urllib3 import disable_warnings

pyclowder/connectors.py

Lines changed: 25 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,6 @@
5555
from email.mime.multipart import MIMEMultipart
5656
from string import Template
5757

58-
from dotenv import load_dotenv
59-
load_dotenv()
60-
61-
clowder_version = float(os.getenv('clowder_version', '1.0'))
62-
6358

6459
class Connector(object):
6560
""" Class that will listen for messages.
@@ -139,7 +134,7 @@ def alive(self):
139134
"""Return whether connection is still alive or not."""
140135
return True
141136

142-
def _build_resource(self, body, host, secret_key):
137+
def _build_resource(self, body, host, secret_key, clowder_version):
143138
"""Examine message body and create resource object based on message type.
144139
145140
Example FILE message -- *.file.#
@@ -242,7 +237,7 @@ def _build_resource(self, body, host, secret_key):
242237

243238
elif resource_type == "file":
244239
ext = os.path.splitext(filename)[1]
245-
if float(os.getenv('clowder_version')) == 2.0:
240+
if clowder_version == 2:
246241
return {
247242
"type": "file",
248243
"id": fileid,
@@ -407,22 +402,18 @@ def _process_message(self, body):
407402
if not host.endswith('/'): host += '/'
408403
secret_key = body.get('secretKey', '')
409404
retry_count = 0 if 'retry_count' not in body else body['retry_count']
410-
resource = self._build_resource(body, host, secret_key)
405+
clowder_version = int(body.get('clowderVersion', os.getenv('CLOWDER_VERSION', '1')))
406+
resource = self._build_resource(body, host, secret_key, clowder_version)
411407
if not resource:
412408
logging.error("No resource found, this is bad.")
413409
return
414410

415411
# register extractor
416-
if clowder_version >= 2.0:
417-
url = "%sapi/v2/extractors" % source_host
418-
else:
412+
if clowder_version != 2:
419413
url = "%sapi/extractors" % source_host
420-
if url not in Connector.registered_clowder:
421-
Connector.registered_clowder.append(url)
422-
if clowder_version >= 2.0:
414+
if url not in Connector.registered_clowder:
415+
Connector.registered_clowder.append(url)
423416
self.register_extractor("%s?key=%s" % (url,secret_key))
424-
else:
425-
self.register_extractor("%s?key=%s" % (url, secret_key))
426417

427418
# tell everybody we are starting to process the file
428419
self.status_update(pyclowder.utils.StatusMessage.start, resource, "Started processing.")
@@ -442,18 +433,10 @@ def _process_message(self, body):
442433
found_local = False
443434
try:
444435
if check_result != pyclowder.utils.CheckMessage.bypass:
445-
if clowder_version >= 2.0:
446-
file_metadata = pyclowder.files.download_info(self, host, secret_key, resource["id"])
447-
else:
448-
file_metadata = pyclowder.files.download_info(self, host, secret_key, resource["id"])
436+
file_metadata = pyclowder.files.download_info(self, host, secret_key, resource["id"])
449437
file_path = self._check_for_local_file(file_metadata)
450438
if not file_path:
451-
if clowder_version >= 2.0:
452-
file_path = pyclowder.files.download(self, host, secret_key, resource["id"],
453-
resource["intermediate_id"],
454-
resource["file_ext"])
455-
else:
456-
file_path = pyclowder.files.download(self, host, secret_key, resource["id"],
439+
file_path = pyclowder.files.download(self, host, secret_key, resource["id"],
457440
resource["intermediate_id"],
458441
resource["file_ext"])
459442
else:
@@ -539,48 +522,24 @@ def register_extractor(self, endpoints):
539522
This assumes a file called extractor_info.json to be located in either the
540523
current working directory, or the folder where the main program is started.
541524
"""
542-
if clowder_version >= 2.0:
543-
if not endpoints or endpoints == "":
544-
return
545-
546-
logger = logging.getLogger(__name__)
547-
548-
headers = {'Content-Type': 'application/json'}
549-
data = self.extractor_info
550-
551-
for url in endpoints.split(','):
552-
if url not in Connector.registered_clowder:
553-
Connector.registered_clowder.append(url)
554-
try:
555-
result = requests.post(url.strip(), headers=headers,
556-
data=json.dumps(data),
557-
verify=self.ssl_verify)
558-
result.raise_for_status()
559-
logger.debug("Registering extractor with %s : %s", url, result.text)
560-
except Exception as exc: # pylint: disable=broad-except
561-
logger.exception('Error in registering extractor: ' + str(exc))
562-
else:
563-
# don't do any work if we wont register the endpoint
564-
if not endpoints or endpoints == "":
565-
return
566-
567-
logger = logging.getLogger(__name__)
568-
569-
headers = {'Content-Type': 'application/json'}
570-
data = self.extractor_info
525+
if not endpoints or endpoints == "":
526+
return
571527

528+
logger = logging.getLogger(__name__)
572529

530+
headers = {'Content-Type': 'application/json'}
531+
data = self.extractor_info
573532

574-
for url in endpoints.split(','):
575-
if url not in Connector.registered_clowder:
576-
Connector.registered_clowder.append(url)
577-
try:
578-
result = requests.post(url.strip(), headers=headers,
579-
data=json.dumps(data),
580-
verify=self.ssl_verify)
581-
result.raise_for_status()
582-
logger.debug("Registering extractor with %s : %s", url, result.text)
583-
except Exception as exc: # pylint: disable=broad-except
533+
for url in endpoints.split(','):
534+
if url not in Connector.registered_clowder:
535+
Connector.registered_clowder.append(url)
536+
try:
537+
result = requests.post(url.strip(), headers=headers,
538+
data=json.dumps(data),
539+
verify=self.ssl_verify)
540+
result.raise_for_status()
541+
logger.debug("Registering extractor with %s : %s", url, result.text)
542+
except Exception as exc: # pylint: disable=broad-except
584543
logger.exception('Error in registering extractor: ' + str(exc))
585544

586545
# pylint: disable=no-self-use
@@ -742,7 +701,7 @@ def connect(self):
742701

743702
self.channel.queue_bind(queue=self.rabbitmq_queue,
744703
exchange=self.rabbitmq_exchange,
745-
routing_key="extractors." + self.extractor_name)
704+
routing_key=self.extractor_name)
746705

747706
# start the extractor announcer
748707
self.announcer = RabbitMQBroadcast(self.rabbitmq_uri, self.extractor_info, self.rabbitmq_queue, self.heartbeat)

pyclowder/datasets.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,7 @@
1515
from pyclowder.collections import get_datasets, get_child_collections, delete as delete_collection
1616
from pyclowder.utils import StatusMessage
1717

18-
from dotenv import load_dotenv
19-
load_dotenv()
20-
clowder_version = float(os.getenv('clowder_version', '1.0'))
21-
18+
clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
2219

2320
def create_empty(connector, host, key, datasetname, description, parentid=None, spaceid=None):
2421
"""Create a new dataset in Clowder.
@@ -32,7 +29,7 @@ def create_empty(connector, host, key, datasetname, description, parentid=None,
3229
parentid -- id of parent collection
3330
spaceid -- id of the space to add dataset to
3431
"""
35-
if clowder_version >= 2.0:
32+
if clowder_version == 2:
3633
datasetid = v2datasets.create_empty(connector, host, key, datasetname, description, parentid, spaceid)
3734
else:
3835
datasetid = v1datasets.create_empty(connector, host, key, datasetname, description, parentid, spaceid)
@@ -48,7 +45,7 @@ def delete(connector, host, key, datasetid):
4845
key -- the secret key to login to clowder
4946
datasetid -- the dataset to delete
5047
"""
51-
if clowder_version >= 2.0:
48+
if clowder_version == 2:
5249
result = v2datasets.delete(connector, host, key, datasetid)
5350
else:
5451
result = v2datasets.delete(connector, host, key, datasetid)
@@ -90,7 +87,7 @@ def download(connector, host, key, datasetid):
9087
key -- the secret key to login to clowder
9188
datasetid -- the file that is currently being processed
9289
"""
93-
if clowder_version >= 2.0:
90+
if clowder_version == 2:
9491
zipfile = v2datasets.download(connector, host, key, datasetid)
9592
else:
9693
zipfile = v1datasets.download(connector, host, key, datasetid)
@@ -107,7 +104,7 @@ def download_metadata(connector, host, key, datasetid, extractor=None):
107104
datasetid -- the dataset to fetch metadata of
108105
extractor -- extractor name to filter results (if only one extractor's metadata is desired)
109106
"""
110-
if clowder_version >= 2.0:
107+
if clowder_version == 2:
111108
result_json = v2datasets.download_metadata(connector, host, key, datasetid, extractor)
112109
return result_json
113110
else:
@@ -124,7 +121,7 @@ def get_info(connector, host, key, datasetid):
124121
key -- the secret key to login to clowder
125122
datasetid -- the dataset to get info of
126123
"""
127-
if clowder_version >= 2.0:
124+
if clowder_version == 2:
128125
info = v2datasets.get_info(connector, host, key, datasetid)
129126
else:
130127
info = v1datasets.get_info(connector, host, key, datasetid)
@@ -140,7 +137,7 @@ def get_file_list(connector, host, key, datasetid):
140137
key -- the secret key to login to clowder
141138
datasetid -- the dataset to get filelist of
142139
"""
143-
if clowder_version >= 2.0:
140+
if clowder_version == 2:
144141
file_list = v2datasets.get_file_list(connector, host, key, datasetid)
145142
else:
146143
file_list = v1datasets.get_file_list(connector, host, key, datasetid)
@@ -158,7 +155,7 @@ def remove_metadata(connector, host, key, datasetid, extractor=None):
158155
extractor -- extractor name to filter deletion
159156
!!! ALL JSON-LD METADATA WILL BE REMOVED IF NO extractor PROVIDED !!!
160157
"""
161-
if clowder_version >= 2.0:
158+
if clowder_version == 2:
162159
v2datasets.remove_metadata(connector, host, key, datasetid, extractor)
163160
else:
164161
v1datasets.remove_metadata(connector, host, key, datasetid, extractor)
@@ -174,7 +171,7 @@ def submit_extraction(connector, host, key, datasetid, extractorname):
174171
datasetid -- the dataset UUID to submit
175172
extractorname -- registered name of extractor to trigger
176173
"""
177-
if clowder_version >= 2.0:
174+
if clowder_version == 2:
178175
result_status_code = v2datasets.submit_extraction(connector, host, key, datasetid, extractorname)
179176
else:
180177
result_status_code = v1datasets.submit_extraction(connector, host, key, datasetid, extractorname)
@@ -235,7 +232,7 @@ def upload_metadata(connector, host, key, datasetid, metadata):
235232
datasetid -- the dataset that is currently being processed
236233
metadata -- the metadata to be uploaded
237234
"""
238-
if clowder_version >= 2.0:
235+
if clowder_version == 2:
239236
v2datasets.upload_metadata(connector, host, key, datasetid, metadata)
240237
else:
241238
v1datasets.upload_metadata(connector, host, key, datasetid, metadata)

pyclowder/extractors.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@
2424
import pyclowder.files
2525
import pyclowder.datasets
2626

27-
from dotenv import load_dotenv
28-
load_dotenv()
29-
clowder_version = float(os.getenv('clowder_version', '1.0'))
27+
clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
3028

3129

3230
class Extractor(object):
@@ -266,7 +264,7 @@ def get_metadata(self, content, resource_type, resource_id, server=None):
266264
if not self._check_key(k, self.extractor_info['contexts']):
267265
logger.debug("Simple check could not find %s in contexts" % k)
268266
# TODO generate clowder2.0 extractor info
269-
if clowder_version >= 2.0:
267+
if clowder_version == 2:
270268
new_extractor_info = self._get_extractor_info_v2()
271269
md = dict()
272270
md["file_version"] = 1

pyclowder/files.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,8 @@
1616
from pyclowder.collections import get_datasets, get_child_collections
1717
import pyclowder.api.v2.files as v2files
1818
import pyclowder.api.v1.files as v1files
19-
from dotenv import load_dotenv
20-
load_dotenv()
21-
clowder_version = float(os.getenv('clowder_version', '1.0'))
19+
20+
clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
2221

2322
# Some sources of urllib3 support warning suppression, but not all
2423
try:
@@ -41,7 +40,7 @@ def download(connector, host, key, fileid, intermediatefileid=None, ext=""):
4140
intermediatefileid -- either same as fileid, or the intermediate file to be used
4241
ext -- the file extension, the downloaded file will end with this extension
4342
"""
44-
if clowder_version >= 2.0:
43+
if clowder_version == 2:
4544
inputfilename = v2files.download(connector, host, key, fileid, intermediatefileid, ext)
4645
else:
4746
inputfilename = v1files.download(connector, host, key, fileid, intermediatefileid, ext)
@@ -58,7 +57,7 @@ def download_info(connector, host, key, fileid):
5857
fileid -- the file to fetch metadata of
5958
"""
6059

61-
if clowder_version >= 2.0:
60+
if clowder_version == 2:
6261
result = v2files.download_info(connector, host, key, fileid)
6362
else:
6463
result = v1files.download_info(connector, host, key, fileid)
@@ -75,7 +74,7 @@ def download_metadata(connector, host, key, fileid, extractor=None):
7574
fileid -- the file to fetch metadata of
7675
extractor -- extractor name to filter results (if only one extractor's metadata is desired)
7776
"""
78-
if clowder_version >= 2.0:
77+
if clowder_version == 2:
7978
result = v2files.download_metadata(connector, host, key, fileid, extractor)
8079
else:
8180
result = v1files.download_metadata(connector, host, key, fileid, extractor)
@@ -92,7 +91,7 @@ def submit_extraction(connector, host, key, fileid, extractorname):
9291
fileid -- the file UUID to submit
9392
extractorname -- registered name of extractor to trigger
9493
"""
95-
if clowder_version >= 2.0:
94+
if clowder_version == 2:
9695
result = v2files.submit_extraction(connector, host, key, fileid, extractorname)
9796
else:
9897
result = v1files.submit_extraction(connector, host, key, fileid, extractorname)
@@ -162,7 +161,7 @@ def upload_metadata(connector, host, key, fileid, metadata):
162161
metadata -- the metadata to be uploaded
163162
"""
164163

165-
if clowder_version >= 2.0:
164+
if clowder_version == 2:
166165
v2files.upload_metadata(connector, host, key, fileid, metadata)
167166
else:
168167
v1files.upload_metadata(connector, host, key, fileid, metadata)
@@ -277,7 +276,7 @@ def upload_to_dataset(connector, host, key, datasetid, filepath, check_duplicate
277276
check_duplicate -- check if filename already exists in dataset and skip upload if so
278277
"""
279278

280-
if clowder_version >= 2.0:
279+
if clowder_version == 2:
281280
v2files.upload_to_dataset(connector, host, key, datasetid, filepath, check_duplicate)
282281
else:
283282
logger = logging.getLogger(__name__)
@@ -322,7 +321,7 @@ def _upload_to_dataset_local(connector, host, key, datasetid, filepath):
322321
filepath -- path to file
323322
"""
324323

325-
if clowder_version >= 2.0:
324+
if clowder_version == 2:
326325
uploadedfileid = v2files._upload_to_dataset_local(connector, host, key, datasetid, filepath)
327326
else:
328327
uploadedfileid = v1files._upload_to_dataset_local(connector, host, key, datasetid, filepath)

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,3 @@ requests-toolbelt==0.9.1
2222
# via pyclowder (setup.py)
2323
urllib3==1.26.8
2424
# via requests
25-
python-dotenv

0 commit comments

Comments
 (0)