Skip to content

Commit b80eb33

Browse files
author
Max Burnette
committed
improve error handling, make message and type separate
1 parent 95d80d1 commit b80eb33

File tree

7 files changed

+80
-76
lines changed

7 files changed

+80
-76
lines changed

pyclowder/collections.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import requests
99

1010
from pyclowder.client import ClowderClient
11-
from pyclowder.utils import StatusMessage
1211

1312

1413
def create_empty(connector, host, key, collectionname, description, parentid=None, spaceid=None):
@@ -121,8 +120,7 @@ def upload_preview(connector, host, key, collectionid, previewfile, previewmetad
121120
section this preview should be associated with.
122121
"""
123122

124-
connector.status_update(StatusMessage.processing, {"type": "collection", "id": collectionid},
125-
"Uploading collection preview.")
123+
connector.message_process({"type": "collection", "id": collectionid}, "Uploading collection preview.")
126124

127125
logger = logging.getLogger(__name__)
128126
headers = {'Content-Type': 'application/json'}

pyclowder/connectors.py

Lines changed: 57 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ def _build_resource(self, body, host, secret_key):
230230
"type": "dataset",
231231
"id": datasetid
232232
}
233-
self.status_update(pyclowder.utils.StatusMessage.error, resource, msg)
234233
self.message_error(resource)
235234
return None
236235

@@ -392,7 +391,7 @@ def _process_message(self, body):
392391
self.register_extractor("%s?key=%s" % (url, secret_key))
393392

394393
# tell everybody we are starting to process the file
395-
self.status_update(pyclowder.utils.StatusMessage.start, resource, "Started processing")
394+
self.status_update(pyclowder.utils.StatusMessage.start.value, resource, "Started processing.")
396395

397396
# checks whether to process the file in this message or not
398397
# pylint: disable=too-many-nested-blocks
@@ -456,41 +455,37 @@ def _process_message(self, body):
456455
logger.exception("Error removing temporary dataset directory")
457456

458457
else:
459-
self.status_update(pyclowder.utils.StatusMessage.processing, resource, "Skipped in check_message")
458+
self.status_update(pyclowder.utils.StatusMessage.skip.value, resource, "Skipped in check_message")
460459

461460
self.message_ok(resource)
462461

463462
except SystemExit as exc:
464-
status = "sys.exit : " + str(exc)
465-
logger.exception("[%s] %s", resource['id'], status)
466-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
467-
self.message_resubmit(resource, retry_count)
463+
message = str.format("sys.exit: {}", str(exc))
464+
logger.exception("[%s] %s", resource['id'], message)
465+
self.message_resubmit(resource, retry_count, message)
468466
raise
469467
except KeyboardInterrupt:
470-
status = "keyboard interrupt"
471-
logger.exception("[%s] %s", resource['id'], status)
472-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
473-
self.message_resubmit(resource, retry_count)
468+
message = "keyboard interrupt"
469+
logger.exception("[%s] %s", resource['id'], message)
470+
self.message_resubmit(resource, retry_count, message)
474471
raise
475472
except GeneratorExit:
476-
status = "generator exit"
477-
logger.exception("[%s] %s", resource['id'], status)
478-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
479-
self.message_resubmit(resource, retry_count)
473+
message = "generator exit"
474+
logger.exception("[%s] %s", resource['id'], message)
475+
self.message_resubmit(resource, retry_count, message)
480476
raise
481477
except subprocess.CalledProcessError as exc:
482-
status = str.format("Error processing [exit code={}]\n{}", exc.returncode, exc.output)
483-
logger.exception("[%s] %s", resource['id'], status)
484-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
485-
self.message_error(resource)
478+
message = str.format("Error in subprocess [exit code={}]:\n{}", exc.returncode, exc.output)
479+
logger.exception("[%s] %s", resource['id'], message)
480+
self.message_error(resource, message)
486481
except Exception as exc: # pylint: disable=broad-except
487-
status = "Error processing : " + str(exc)
488-
logger.exception("[%s] %s", resource['id'], status)
489-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
482+
message = str(exc)
483+
logger.exception("[%s] %s", resource['id'], message)
490484
if retry_count < 10:
491-
self.message_resubmit(resource, retry_count + 1)
485+
message = "(#%s) %s" % (retry_count+1, message)
486+
self.message_resubmit(resource, retry_count+1, message)
492487
else:
493-
self.message_error(resource)
488+
self.message_error(resource, message)
494489

495490
def register_extractor(self, endpoints):
496491
"""Register extractor info with Clowder.
@@ -528,21 +523,23 @@ def status_update(self, status, resource, message):
528523
the instance know the progress of the extractor.
529524
530525
Keyword arguments:
531-
status - START | PROCESSING | DONE | ERROR
526+
status - pyclowder.utils.StatusMessage value
532527
resource - descriptor object with {"type", "id"} fields
533528
message - contents of the status update
534529
"""
535530
logging.getLogger(__name__).info("[%s] : %s: %s", resource["id"], status, message)
536531

537-
def message_ok(self, resource):
538-
self.status_update(pyclowder.utils.StatusMessage.done, resource, "Done processing")
532+
def message_ok(self, resource, message="Done processing."):
533+
self.status_update(pyclowder.utils.StatusMessage.done.value, resource, message)
534+
535+
def message_error(self, resource, message="Error processing message."):
536+
self.status_update(pyclowder.utils.StatusMessage.error.value, resource, message)
539537

540-
def message_error(self, resource):
541-
self.status_update(pyclowder.utils.StatusMessage.error, resource, "Error processing message")
538+
def message_resubmit(self, resource, retry_count, message="Resubmitting message."):
539+
self.status_update(pyclowder.utils.StatusMessage.retry.value, resource, message)
542540

543-
def message_resubmit(self, resource, retry_count):
544-
self.status_update(pyclowder.utils.StatusMessage.processing, resource, "Resubmitting message (attempt #%s)"
545-
% retry_count)
541+
def message_process(self, resource, message):
542+
self.status_update(pyclowder.utils.StatusMessage.processing.value, resource, message)
546543

547544
def get(self, url, params=None, raise_status=True, **kwargs):
548545
"""
@@ -871,19 +868,22 @@ def process_messages(self, channel, rabbitmq_queue):
871868
with self.lock:
872869
msg = self.messages.pop(0)
873870

871+
# PROCESSING - Standard update message during extractor processing
874872
if msg["type"] == 'status':
875873
if self.header.reply_to:
876874
properties = pika.BasicProperties(delivery_mode=2, correlation_id=self.header.correlation_id)
877875
channel.basic_publish(exchange='',
878876
routing_key=self.header.reply_to,
879877
properties=properties,
880-
body=json.dumps(msg['status']))
878+
body=json.dumps(msg['payload']))
881879

880+
# DONE - Extractor finished without error
882881
elif msg["type"] == 'ok':
883882
channel.basic_ack(self.method.delivery_tag)
884883
with self.lock:
885884
self.finished = True
886885

886+
# ERROR - Extractor encountered error and message goes to error queue
887887
elif msg["type"] == 'error':
888888
properties = pika.BasicProperties(delivery_mode=2, reply_to=self.header.reply_to)
889889
channel.basic_publish(exchange='',
@@ -894,18 +894,18 @@ def process_messages(self, channel, rabbitmq_queue):
894894
with self.lock:
895895
self.finished = True
896896

897+
# RESUBMITTING - Extractor encountered error and message is resubmitted to same queue
897898
elif msg["type"] == 'resubmit':
898-
retry_count = msg['retry_count']
899-
queue = rabbitmq_queue
900-
properties = pika.BasicProperties(delivery_mode=2, reply_to=self.header.reply_to)
901899
jbody = json.loads(self.body)
902-
jbody['retry_count'] = retry_count
900+
jbody['retry_count'] = msg['retry_count']
903901
if 'exchange' not in jbody and self.method.exchange:
904902
jbody['exchange'] = self.method.exchange
905-
if 'routing_key' not in jbody and self.method.routing_key and self.method.routing_key != queue:
903+
if 'routing_key' not in jbody and self.method.routing_key and self.method.routing_key != rabbitmq_queue:
906904
jbody['routing_key'] = self.method.routing_key
905+
906+
properties = pika.BasicProperties(delivery_mode=2, reply_to=self.header.reply_to)
907907
channel.basic_publish(exchange='',
908-
routing_key=queue,
908+
routing_key=rabbitmq_queue,
909909
properties=properties,
910910
body=json.dumps(jbody))
911911
channel.basic_ack(self.method.delivery_tag)
@@ -917,30 +917,33 @@ def process_messages(self, channel, rabbitmq_queue):
917917

918918
def status_update(self, status, resource, message):
919919
super(RabbitMQHandler, self).status_update(status, resource, message)
920-
status_report = dict()
921-
# TODO: Update this to check resource["type"] once Clowder better supports dataset events
922-
status_report['file_id'] = resource["id"]
923-
status_report['extractor_id'] = self.extractor_info['name']
924-
status_report['status'] = "%s: %s" % (status, message)
925-
status_report['start'] = pyclowder.utils.iso8601time()
926920
with self.lock:
921+
# TODO: Remove 'status' from payload later and read from message_type and message in Clowder 2.0
927922
self.messages.append({"type": "status",
928-
"status": status_report,
929923
"resource": resource,
930-
"message": message})
931-
932-
def message_ok(self, resource):
933-
super(RabbitMQHandler, self).message_ok(resource)
924+
"payload": {
925+
"file_id": resource["id"],
926+
"extractor_id": self.extractor_info['name'],
927+
"status": "%s: %s" % (status, message),
928+
"start": pyclowder.utils.iso8601time(),
929+
"message_type": status,
930+
"message": message
931+
}})
932+
933+
def message_ok(self, resource, message="Done processing."):
934+
super(RabbitMQHandler, self).message_ok(resource, message)
934935
with self.lock:
935936
self.messages.append({"type": "ok"})
936937

937-
def message_error(self, resource):
938-
super(RabbitMQHandler, self).message_error(resource)
938+
def message_error(self, resource, message="Error processing message."):
939+
super(RabbitMQHandler, self).message_error(resource, message)
939940
with self.lock:
940941
self.messages.append({"type": "error"})
941942

942-
def message_resubmit(self, resource, retry_count):
943-
super(RabbitMQHandler, self).message_resubmit(resource, retry_count)
943+
def message_resubmit(self, resource, retry_count, message=None):
944+
if message is None:
945+
message = "(#%s)" % retry_count
946+
super(RabbitMQHandler, self).message_resubmit(resource, retry_count, message)
944947
with self.lock:
945948
self.messages.append({"type": "resubmit", "retry_count": retry_count})
946949

pyclowder/datasets.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
from pyclowder.client import ClowderClient
1414
from pyclowder.collections import get_datasets, get_child_collections, delete as delete_collection
15-
from pyclowder.utils import StatusMessage
1615

1716

1817
def create_empty(connector, host, key, datasetname, description, parentid=None, spaceid=None):
@@ -113,7 +112,7 @@ def download(connector, host, key, datasetid):
113112
datasetid -- the file that is currently being processed
114113
"""
115114

116-
connector.status_update(StatusMessage.processing, {"type": "dataset", "id": datasetid}, "Downloading dataset.")
115+
connector.message_process({"type": "dataset", "id": datasetid}, "Downloading dataset.")
117116

118117
# fetch dataset zipfile
119118
url = '%sapi/datasets/%s/download?key=%s' % (host, datasetid, key)
@@ -268,8 +267,7 @@ def upload_metadata(connector, host, key, datasetid, metadata):
268267
metadata -- the metadata to be uploaded
269268
"""
270269

271-
connector.status_update(StatusMessage.processing, {"type": "dataset", "id": datasetid},
272-
"Uploading dataset metadata.")
270+
connector.message_process({"type": "dataset", "id": datasetid}, "Uploading dataset metadata.")
273271

274272
headers = {'Content-Type': 'application/json'}
275273
url = '%sapi/datasets/%s/metadata.jsonld?key=%s' % (host, datasetid, key)

pyclowder/files.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from pyclowder.datasets import get_file_list
1616
from pyclowder.collections import get_datasets, get_child_collections
17-
from pyclowder.utils import StatusMessage
1817

1918
# Some sources of urllib3 support warning suppression, but not all
2019
try:
@@ -38,7 +37,7 @@ def download(connector, host, key, fileid, intermediatefileid=None, ext=""):
3837
ext -- the file extension, the downloaded file will end with this extension
3938
"""
4039

41-
connector.status_update(StatusMessage.processing, {"type": "file", "id": fileid}, "Downloading file.")
40+
connector.message_process({"type": "file", "id": fileid}, "Downloading file.")
4241

4342
# TODO: intermediateid doesn't really seem to be used here, can we remove entirely?
4443
if not intermediatefileid:
@@ -180,7 +179,7 @@ def upload_metadata(connector, host, key, fileid, metadata):
180179
metadata -- the metadata to be uploaded
181180
"""
182181

183-
connector.status_update(StatusMessage.processing, {"type": "file", "id": fileid}, "Uploading file metadata.")
182+
connector.message_process({"type": "file", "id": fileid}, "Uploading file metadata.")
184183

185184
headers = {'Content-Type': 'application/json'}
186185
url = '%sapi/files/%s/metadata.jsonld?key=%s' % (host, fileid, key)
@@ -204,7 +203,7 @@ def upload_preview(connector, host, key, fileid, previewfile, previewmetadata=No
204203
file itself and this parameter can be ignored. E.g. 'application/vnd.clowder+custom+xml'
205204
"""
206205

207-
connector.status_update(StatusMessage.processing, {"type": "file", "id": fileid}, "Uploading file preview.")
206+
connector.message_process({"type": "file", "id": fileid}, "Uploading file preview.")
208207

209208
logger = logging.getLogger(__name__)
210209
headers = {'Content-Type': 'application/json'}
@@ -248,7 +247,7 @@ def upload_tags(connector, host, key, fileid, tags):
248247
tags -- the tags to be uploaded
249248
"""
250249

251-
connector.status_update(StatusMessage.processing, {"type": "file", "id": fileid}, "Uploading file tags.")
250+
connector.message_process({"type": "file", "id": fileid}, "Uploading file tags.")
252251

253252
headers = {'Content-Type': 'application/json'}
254253
url = '%sapi/files/%s/tags?key=%s' % (host, fileid, key)

pyclowder/sections.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88

99
import requests
1010

11-
from pyclowder.utils import StatusMessage
12-
1311

1412
def upload(connector, host, key, sectiondata):
1513
"""Upload section to Clowder.
@@ -47,7 +45,7 @@ def upload_tags(connector, host, key, sectionid, tags):
4745
tags -- the tags to be uploaded
4846
"""
4947

50-
connector.status_update(StatusMessage.processing, {"type": "section", "id": sectionid}, "Uploading section tags.")
48+
connector.message_process({"type": "section", "id": sectionid}, "Uploading section tags.")
5149

5250
headers = {'Content-Type': 'application/json'}
5351
url = '%sapi/sections/%s/tags?key=%s' % (host, sectionid, key)
@@ -67,7 +65,7 @@ def upload_description(connector, host, key, sectionid, description):
6765
description -- the description to be uploaded
6866
"""
6967

70-
connector.status_update(StatusMessage.processing, {"type": "section", "id": sectionid},
68+
connector.message_process({"type": "section", "id": sectionid},
7169
"Uploading section description.")
7270

7371
headers = {'Content-Type': 'application/json'}

pyclowder/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ class StatusMessage(Enum):
4444
full string will be STATUS: MESSAGE.
4545
"""
4646

47-
start = "START"
47+
start = "STARTED"
4848
processing = "PROCESSING"
49-
done = "DONE"
49+
done = "SUCCEEDED"
50+
skip = "SKIPPED"
5051
error = "ERROR"
52+
retry = "RESUBMITTED"
5153

5254

5355
def iso8601time():

sample-extractors/wordcount/wordcount.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,30 @@ def process_message(self, connector, host, secret_key, resource, parameters):
3232
inputfile = resource["local_paths"][0]
3333
file_id = resource['id']
3434

35-
# call actual program
35+
# These process messages will appear in the Clowder UI under Extractions.
36+
connector.message_process(resource, "Loading contents of file...")
37+
38+
# Call actual program
3639
result = subprocess.check_output(['wc', inputfile], stderr=subprocess.STDOUT)
3740
result = result.decode('utf-8')
3841
(lines, words, characters, _) = result.split()
3942

40-
# store results as metadata
43+
connector.message_process(resource, "Found %s lines and %s words..." % (lines, words))
44+
45+
# Store results as metadata
4146
result = {
4247
'lines': lines,
4348
'words': words,
4449
'characters': characters
4550
}
4651
metadata = self.get_metadata(result, 'file', file_id, host)
52+
53+
# Normal logs will appear in the extractor log, but NOT in the Clowder UI.
4754
logger.debug(metadata)
4855

49-
# upload metadata
56+
# Upload metadata to original file
5057
pyclowder.files.upload_metadata(connector, host, secret_key, file_id, metadata)
5158

52-
5359
if __name__ == "__main__":
5460
extractor = WordCount()
5561
extractor.start()

0 commit comments

Comments
 (0)