Skip to content

Commit b9738cb

Browse files
authored
Merge pull request #2 from clowder-framework/separate-extractor-message-and-type
improve error handling, make extractor message and type separate
2 parents 7190785 + 2cf2d1a commit b9738cb

File tree

7 files changed

+98
-78
lines changed

7 files changed

+98
-78
lines changed

pyclowder/collections.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import requests
99

1010
from pyclowder.client import ClowderClient
11-
from pyclowder.utils import StatusMessage
1211

1312

1413
def create_empty(connector, host, key, collectionname, description, parentid=None, spaceid=None):
@@ -121,8 +120,7 @@ def upload_preview(connector, host, key, collectionid, previewfile, previewmetad
121120
section this preview should be associated with.
122121
"""
123122

124-
connector.status_update(StatusMessage.processing, {"type": "collection", "id": collectionid},
125-
"Uploading collection preview.")
123+
connector.message_process({"type": "collection", "id": collectionid}, "Uploading collection preview.")
126124

127125
logger = logging.getLogger(__name__)
128126
headers = {'Content-Type': 'application/json'}

pyclowder/connectors.py

Lines changed: 74 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ def _build_resource(self, body, host, secret_key):
230230
"type": "dataset",
231231
"id": datasetid
232232
}
233-
self.status_update(pyclowder.utils.StatusMessage.error, resource, msg)
234233
self.message_error(resource)
235234
return None
236235

@@ -392,7 +391,7 @@ def _process_message(self, body):
392391
self.register_extractor("%s?key=%s" % (url, secret_key))
393392

394393
# tell everybody we are starting to process the file
395-
self.status_update(pyclowder.utils.StatusMessage.start, resource, "Started processing")
394+
self.status_update(pyclowder.utils.StatusMessage.start.value, resource, "Started processing.")
396395

397396
# checks whether to process the file in this message or not
398397
# pylint: disable=too-many-nested-blocks
@@ -456,41 +455,41 @@ def _process_message(self, body):
456455
logger.exception("Error removing temporary dataset directory")
457456

458457
else:
459-
self.status_update(pyclowder.utils.StatusMessage.processing, resource, "Skipped in check_message")
458+
self.status_update(pyclowder.utils.StatusMessage.skip.value, resource, "Skipped in check_message")
460459

461460
self.message_ok(resource)
462461

463462
except SystemExit as exc:
464-
status = "sys.exit : " + str(exc)
465-
logger.exception("[%s] %s", resource['id'], status)
466-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
467-
self.message_resubmit(resource, retry_count)
463+
message = str.format("sys.exit: {}", str(exc))
464+
logger.exception("[%s] %s", resource['id'], message)
465+
self.message_resubmit(resource, retry_count, message)
468466
raise
469467
except KeyboardInterrupt:
470-
status = "keyboard interrupt"
471-
logger.exception("[%s] %s", resource['id'], status)
472-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
473-
self.message_resubmit(resource, retry_count)
468+
message = "keyboard interrupt"
469+
logger.exception("[%s] %s", resource['id'], message)
470+
self.message_resubmit(resource, retry_count, message)
474471
raise
475472
except GeneratorExit:
476-
status = "generator exit"
477-
logger.exception("[%s] %s", resource['id'], status)
478-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
479-
self.message_resubmit(resource, retry_count)
473+
message = "generator exit"
474+
logger.exception("[%s] %s", resource['id'], message)
475+
self.message_resubmit(resource, retry_count, message)
480476
raise
481477
except subprocess.CalledProcessError as exc:
482-
status = str.format("Error processing [exit code={}]\n{}", exc.returncode, exc.output)
483-
logger.exception("[%s] %s", resource['id'], status)
484-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
485-
self.message_error(resource)
478+
message = str.format("Error in subprocess [exit code={}]:\n{}", exc.returncode, exc.output)
479+
logger.exception("[%s] %s", resource['id'], message)
480+
self.message_error(resource, message)
481+
except PyClowderExtractionAbort as exc:
482+
message = str.format("Aborting message: {}", exc.message)
483+
logger.exception("[%s] %s", resource['id'], message)
484+
self.message_error(resource, message)
486485
except Exception as exc: # pylint: disable=broad-except
487-
status = "Error processing : " + str(exc)
488-
logger.exception("[%s] %s", resource['id'], status)
489-
self.status_update(pyclowder.utils.StatusMessage.error, resource, status)
486+
message = str(exc)
487+
logger.exception("[%s] %s", resource['id'], message)
490488
if retry_count < 10:
491-
self.message_resubmit(resource, retry_count + 1)
489+
message = "(#%s) %s" % (retry_count+1, message)
490+
self.message_resubmit(resource, retry_count+1, message)
492491
else:
493-
self.message_error(resource)
492+
self.message_error(resource, message)
494493

495494
def register_extractor(self, endpoints):
496495
"""Register extractor info with Clowder.
@@ -528,21 +527,23 @@ def status_update(self, status, resource, message):
528527
the instance know the progress of the extractor.
529528
530529
Keyword arguments:
531-
status - START | PROCESSING | DONE | ERROR
530+
status - pyclowder.utils.StatusMessage value
532531
resource - descriptor object with {"type", "id"} fields
533532
message - contents of the status update
534533
"""
535534
logging.getLogger(__name__).info("[%s] : %s: %s", resource["id"], status, message)
536535

537-
def message_ok(self, resource):
538-
self.status_update(pyclowder.utils.StatusMessage.done, resource, "Done processing")
536+
def message_ok(self, resource, message="Done processing."):
537+
self.status_update(pyclowder.utils.StatusMessage.done.value, resource, message)
538+
539+
def message_error(self, resource, message="Error processing message."):
540+
self.status_update(pyclowder.utils.StatusMessage.error.value, resource, message)
539541

540-
def message_error(self, resource):
541-
self.status_update(pyclowder.utils.StatusMessage.error, resource, "Error processing message")
542+
def message_resubmit(self, resource, retry_count, message="Resubmitting message."):
543+
self.status_update(pyclowder.utils.StatusMessage.retry.value, resource, message)
542544

543-
def message_resubmit(self, resource, retry_count):
544-
self.status_update(pyclowder.utils.StatusMessage.processing, resource, "Resubmitting message (attempt #%s)"
545-
% retry_count)
545+
def message_process(self, resource, message):
546+
self.status_update(pyclowder.utils.StatusMessage.processing.value, resource, message)
546547

547548
def get(self, url, params=None, raise_status=True, **kwargs):
548549
"""
@@ -877,19 +878,22 @@ def process_messages(self, channel, rabbitmq_queue):
877878
with self.lock:
878879
msg = self.messages.pop(0)
879880

881+
# PROCESSING - Standard update message during extractor processing
880882
if msg["type"] == 'status':
881883
if self.header.reply_to:
882884
properties = pika.BasicProperties(delivery_mode=2, correlation_id=self.header.correlation_id)
883885
channel.basic_publish(exchange='',
884886
routing_key=self.header.reply_to,
885887
properties=properties,
886-
body=json.dumps(msg['status']))
888+
body=json.dumps(msg['payload']))
887889

890+
# DONE - Extractor finished without error
888891
elif msg["type"] == 'ok':
889892
channel.basic_ack(self.method.delivery_tag)
890893
with self.lock:
891894
self.finished = True
892895

896+
# ERROR - Extractor encountered error and message goes to error queue
893897
elif msg["type"] == 'error':
894898
properties = pika.BasicProperties(delivery_mode=2, reply_to=self.header.reply_to)
895899
channel.basic_publish(exchange='',
@@ -900,18 +904,18 @@ def process_messages(self, channel, rabbitmq_queue):
900904
with self.lock:
901905
self.finished = True
902906

907+
# RESUBMITTING - Extractor encountered error and message is resubmitted to same queue
903908
elif msg["type"] == 'resubmit':
904-
retry_count = msg['retry_count']
905-
queue = rabbitmq_queue
906-
properties = pika.BasicProperties(delivery_mode=2, reply_to=self.header.reply_to)
907909
jbody = json.loads(self.body)
908-
jbody['retry_count'] = retry_count
910+
jbody['retry_count'] = msg['retry_count']
909911
if 'exchange' not in jbody and self.method.exchange:
910912
jbody['exchange'] = self.method.exchange
911-
if 'routing_key' not in jbody and self.method.routing_key and self.method.routing_key != queue:
913+
if 'routing_key' not in jbody and self.method.routing_key and self.method.routing_key != rabbitmq_queue:
912914
jbody['routing_key'] = self.method.routing_key
915+
916+
properties = pika.BasicProperties(delivery_mode=2, reply_to=self.header.reply_to)
913917
channel.basic_publish(exchange='',
914-
routing_key=queue,
918+
routing_key=rabbitmq_queue,
915919
properties=properties,
916920
body=json.dumps(jbody))
917921
channel.basic_ack(self.method.delivery_tag)
@@ -923,31 +927,35 @@ def process_messages(self, channel, rabbitmq_queue):
923927

924928
def status_update(self, status, resource, message):
925929
super(RabbitMQHandler, self).status_update(status, resource, message)
926-
status_report = dict()
927-
# TODO: Update this to check resource["type"] once Clowder better supports dataset events
928-
status_report['file_id'] = resource["id"]
929-
status_report['job_id'] = self.job_id
930-
status_report['extractor_id'] = self.extractor_info['name']
931-
status_report['status'] = "%s: %s" % (status, message)
932-
status_report['start'] = pyclowder.utils.iso8601time()
930+
933931
with self.lock:
932+
# TODO: Remove 'status' from payload later and read from message_type and message in Clowder 2.0
934933
self.messages.append({"type": "status",
935-
"status": status_report,
936934
"resource": resource,
937-
"message": message})
938-
939-
def message_ok(self, resource):
940-
super(RabbitMQHandler, self).message_ok(resource)
935+
"payload": {
936+
"file_id": resource["id"],
937+
"extractor_id": self.extractor_info['name'],
938+
"job_id": self.job_id,
939+
"status": "%s: %s" % (status, message),
940+
"start": pyclowder.utils.iso8601time(),
941+
"message_type": status,
942+
"message": message
943+
}})
944+
945+
def message_ok(self, resource, message="Done processing."):
946+
super(RabbitMQHandler, self).message_ok(resource, message)
941947
with self.lock:
942948
self.messages.append({"type": "ok"})
943949

944-
def message_error(self, resource):
945-
super(RabbitMQHandler, self).message_error(resource)
950+
def message_error(self, resource, message="Error processing message."):
951+
super(RabbitMQHandler, self).message_error(resource, message)
946952
with self.lock:
947953
self.messages.append({"type": "error"})
948954

949-
def message_resubmit(self, resource, retry_count):
950-
super(RabbitMQHandler, self).message_resubmit(resource, retry_count)
955+
def message_resubmit(self, resource, retry_count, message=None):
956+
if message is None:
957+
message = "(#%s)" % retry_count
958+
super(RabbitMQHandler, self).message_resubmit(resource, retry_count, message)
951959
with self.lock:
952960
self.messages.append({"type": "resubmit", "retry_count": retry_count})
953961

@@ -1105,3 +1113,14 @@ def put(self, url, data=None, raise_status=True, **kwargs):
11051113
def delete(self, url, raise_status=True, **kwargs):
11061114
logging.getLogger(__name__).debug("DELETE: " + url)
11071115
return None
1116+
1117+
1118+
class PyClowderExtractionAbort(Exception):
1119+
"""Raise exception that will not be subject to retry attempts (i.e. errors that are expected to fail again).
1120+
1121+
Attributes:
1122+
message -- explanation of the error
1123+
"""
1124+
1125+
def __init__(self, message):
1126+
self.message = message

pyclowder/datasets.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
from pyclowder.client import ClowderClient
1414
from pyclowder.collections import get_datasets, get_child_collections, delete as delete_collection
15-
from pyclowder.utils import StatusMessage
1615

1716

1817
def create_empty(connector, host, key, datasetname, description, parentid=None, spaceid=None):
@@ -113,7 +112,7 @@ def download(connector, host, key, datasetid):
113112
datasetid -- the file that is currently being processed
114113
"""
115114

116-
connector.status_update(StatusMessage.processing, {"type": "dataset", "id": datasetid}, "Downloading dataset.")
115+
connector.message_process({"type": "dataset", "id": datasetid}, "Downloading dataset.")
117116

118117
# fetch dataset zipfile
119118
url = '%sapi/datasets/%s/download?key=%s' % (host, datasetid, key)
@@ -287,8 +286,7 @@ def upload_metadata(connector, host, key, datasetid, metadata):
287286
metadata -- the metadata to be uploaded
288287
"""
289288

290-
connector.status_update(StatusMessage.processing, {"type": "dataset", "id": datasetid},
291-
"Uploading dataset metadata.")
289+
connector.message_process({"type": "dataset", "id": datasetid}, "Uploading dataset metadata.")
292290

293291
headers = {'Content-Type': 'application/json'}
294292
url = '%sapi/datasets/%s/metadata.jsonld?key=%s' % (host, datasetid, key)

pyclowder/files.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from pyclowder.datasets import get_file_list
1616
from pyclowder.collections import get_datasets, get_child_collections
17-
from pyclowder.utils import StatusMessage
1817

1918
# Some sources of urllib3 support warning suppression, but not all
2019
try:
@@ -38,7 +37,7 @@ def download(connector, host, key, fileid, intermediatefileid=None, ext=""):
3837
ext -- the file extension, the downloaded file will end with this extension
3938
"""
4039

41-
connector.status_update(StatusMessage.processing, {"type": "file", "id": fileid}, "Downloading file.")
40+
connector.message_process({"type": "file", "id": fileid}, "Downloading file.")
4241

4342
# TODO: intermediateid doesn't really seem to be used here, can we remove entirely?
4443
if not intermediatefileid:
@@ -180,7 +179,7 @@ def upload_metadata(connector, host, key, fileid, metadata):
180179
metadata -- the metadata to be uploaded
181180
"""
182181

183-
connector.status_update(StatusMessage.processing, {"type": "file", "id": fileid}, "Uploading file metadata.")
182+
connector.message_process({"type": "file", "id": fileid}, "Uploading file metadata.")
184183

185184
headers = {'Content-Type': 'application/json'}
186185
url = '%sapi/files/%s/metadata.jsonld?key=%s' % (host, fileid, key)
@@ -204,7 +203,7 @@ def upload_preview(connector, host, key, fileid, previewfile, previewmetadata=No
204203
file itself and this parameter can be ignored. E.g. 'application/vnd.clowder+custom+xml'
205204
"""
206205

207-
connector.status_update(StatusMessage.processing, {"type": "file", "id": fileid}, "Uploading file preview.")
206+
connector.message_process({"type": "file", "id": fileid}, "Uploading file preview.")
208207

209208
logger = logging.getLogger(__name__)
210209
headers = {'Content-Type': 'application/json'}
@@ -248,7 +247,7 @@ def upload_tags(connector, host, key, fileid, tags):
248247
tags -- the tags to be uploaded
249248
"""
250249

251-
connector.status_update(StatusMessage.processing, {"type": "file", "id": fileid}, "Uploading file tags.")
250+
connector.message_process({"type": "file", "id": fileid}, "Uploading file tags.")
252251

253252
headers = {'Content-Type': 'application/json'}
254253
url = '%sapi/files/%s/tags?key=%s' % (host, fileid, key)

pyclowder/sections.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88

99
import requests
1010

11-
from pyclowder.utils import StatusMessage
12-
1311

1412
def upload(connector, host, key, sectiondata):
1513
"""Upload section to Clowder.
@@ -47,7 +45,7 @@ def upload_tags(connector, host, key, sectionid, tags):
4745
tags -- the tags to be uploaded
4846
"""
4947

50-
connector.status_update(StatusMessage.processing, {"type": "section", "id": sectionid}, "Uploading section tags.")
48+
connector.message_process({"type": "section", "id": sectionid}, "Uploading section tags.")
5149

5250
headers = {'Content-Type': 'application/json'}
5351
url = '%sapi/sections/%s/tags?key=%s' % (host, sectionid, key)
@@ -67,8 +65,8 @@ def upload_description(connector, host, key, sectionid, description):
6765
description -- the description to be uploaded
6866
"""
6967

70-
connector.status_update(StatusMessage.processing, {"type": "section", "id": sectionid},
71-
"Uploading section description.")
68+
connector.message_process({"type": "section", "id": sectionid},
69+
"Uploading section description.")
7270

7371
headers = {'Content-Type': 'application/json'}
7472
url = '%sapi/sections/%s/description?key=%s' % (host, sectionid, key)

pyclowder/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ class StatusMessage(Enum):
4444
full string will be STATUS: MESSAGE.
4545
"""
4646

47-
start = "START"
47+
start = "STARTED"
4848
processing = "PROCESSING"
49-
done = "DONE"
49+
done = "SUCCEEDED"
50+
skip = "SKIPPED"
5051
error = "ERROR"
52+
retry = "RESUBMITTED"
5153

5254

5355
def iso8601time():

sample-extractors/wordcount/wordcount.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,30 @@ def process_message(self, connector, host, secret_key, resource, parameters):
3232
inputfile = resource["local_paths"][0]
3333
file_id = resource['id']
3434

35-
# call actual program
35+
# These process messages will appear in the Clowder UI under Extractions.
36+
connector.message_process(resource, "Loading contents of file...")
37+
38+
# Call actual program
3639
result = subprocess.check_output(['wc', inputfile], stderr=subprocess.STDOUT)
3740
result = result.decode('utf-8')
3841
(lines, words, characters, _) = result.split()
3942

40-
# store results as metadata
43+
connector.message_process(resource, "Found %s lines and %s words..." % (lines, words))
44+
45+
# Store results as metadata
4146
result = {
4247
'lines': lines,
4348
'words': words,
4449
'characters': characters
4550
}
4651
metadata = self.get_metadata(result, 'file', file_id, host)
52+
53+
# Normal logs will appear in the extractor log, but NOT in the Clowder UI.
4754
logger.debug(metadata)
4855

49-
# upload metadata
56+
# Upload metadata to original file
5057
pyclowder.files.upload_metadata(connector, host, secret_key, file_id, metadata)
5158

52-
5359
if __name__ == "__main__":
5460
extractor = WordCount()
5561
extractor.start()

0 commit comments

Comments
 (0)