Skip to content

Commit 5215a89

Browse files
FIX: More generic way of using different (de)-serializers
Signed-off-by: Sebastian Waldbauer <[email protected]>
1 parent 5697faf commit 5215a89

File tree

9 files changed

+105
-92
lines changed

9 files changed

+105
-92
lines changed

intelmq/lib/bot.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import inspect
1818
import io
1919
import json
20-
import msgpack
2120
import logging
2221
import os
2322
import re
@@ -99,6 +98,7 @@ class Bot(object):
9998
statistics_host: str = "127.0.0.1"
10099
statistics_password: Optional[str] = None
101100
statistics_port: int = 6379
101+
pipeline_use_packer: str = os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')
102102

103103
_message_processed_verb: str = 'Processed'
104104

@@ -808,7 +808,7 @@ def __init_logger(self):
808808

809809
def __log_configuration_parameter(self, config_name: str, option: str, value: Any):
810810
if "password" in option or "token" in option:
811-
value = "HIDDEN"
811+
value = "<redacted>"
812812

813813
message = "{} configuration: parameter {!r} loaded with value {!r}." \
814814
.format(config_name.title(), option, value)
@@ -1369,9 +1369,8 @@ def export_event(self, event: libmessage.Event,
13691369
if 'raw' in event:
13701370
del event['raw']
13711371
if return_type is str:
1372-
return event.to_json(hierarchical=self.hierarchical,
1373-
with_type=self.with_type,
1374-
jsondict_as_string=self.jsondict_as_string)
1372+
return event.to_pack("JSON", hierarchical=self.hierarchical,
1373+
with_type=self.with_type)
13751374
else:
13761375
retval = event.to_dict(hierarchical=self.hierarchical,
13771376
with_type=self.with_type,

intelmq/lib/exceptions.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,4 +180,13 @@ class UnserializationError(IntelMQException, ValueError):
180180
"""
181181
def __init__(self, exception: Exception = None, object: bytes = None):
182182
self.object = object
183-
super().__init__("Could not unserialize message%s." % exception)
183+
super().__init__("Could not unserialize message, %s." % exception)
184+
185+
186+
class SerializationError(IntelMQException, ValueError):
187+
"""
188+
Unrecoverable error during message serialization
189+
"""
190+
def __init__(self, exception: Exception = None, object: bytes = None):
191+
self.object = object
192+
super().__init__("Could not serialize message, %s." % exception)

intelmq/lib/message.py

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,20 @@
1414
import warnings
1515
from collections import defaultdict
1616
from typing import Any, Dict, Iterable, Optional, Sequence, Union
17-
import msgpack
1817

1918
import intelmq.lib.exceptions as exceptions
2019
import intelmq.lib.harmonization
2120
from intelmq import HARMONIZATION_CONF_FILE
2221
from intelmq.lib import utils
2322

24-
__all__ = ['Event', 'Message', 'MessageFactory', 'Report']
23+
__all__ = ['Event', 'Message', 'MessageFactory', 'Report', 'Packer', 'PackerMsgPack', 'PackerJSON']
2524
VALID_MESSSAGE_TYPES = ('Event', 'Message', 'Report')
2625

26+
try:
27+
import msgpack
28+
except:
29+
msgpack = None
30+
2731

2832
class MessageFactory(object):
2933
"""
@@ -60,7 +64,7 @@ def from_dict(message: dict, harmonization=None,
6064

6165
@staticmethod
6266
def unserialize(raw_message: bytes, harmonization: dict = None,
63-
default_type: Optional[str] = None, use_packer: str = "msgpack") -> dict:
67+
default_type: Optional[str] = None, use_packer: str = "MsgPack") -> dict:
6468
"""
6569
Takes JSON-encoded Message object, returns instance of correct class.
6670
@@ -78,14 +82,13 @@ def unserialize(raw_message: bytes, harmonization: dict = None,
7882
default_type=default_type)
7983

8084
@staticmethod
81-
def serialize(message) -> bytes:
85+
def serialize(message, use_packer: str = 'MsgPack') -> bytes:
8286
"""
8387
Takes instance of message-derived class and makes JSON-encoded Message.
8488
8589
The class is saved in __type attribute.
8690
"""
87-
raw_message = Message.serialize(message)
88-
return raw_message
91+
return Message.serialize(message, use_packer=use_packer)
8992

9093

9194
class Message(dict):
@@ -309,30 +312,29 @@ def deep_copy(self):
309312
harmonization={self.__class__.__name__.lower(): self.harmonization_config})
310313

311314
def __str__(self):
312-
return self.serialize(use_packer="json")
315+
return self.serialize(use_packer="JSON")
313316

314-
def serialize(self, use_packer: str = "msgpack"):
317+
def serialize(self, use_packer: str = "MsgPack"):
315318
delete_type = False
316319
if '__type' not in self:
317320
delete_type = True
318321
self['__type'] = self.__class__.__name__
319322

320-
if use_packer == "json":
321-
packed = json.dumps(self)
322-
else:
323-
packed = msgpack.packb(self)
323+
try:
324+
packer: Packer = getattr(intelmq.lib.message, f"Packer{use_packer}")()
325+
packed = packer.serialize(data=self)
326+
except Exception as exc:
327+
raise exceptions.SerializationError(exception=exc, object=self)
324328

325329
if delete_type:
326330
del self['__type']
327331
return packed
328332

329333
@staticmethod
330-
def unserialize(message: bytes, use_packer: str = "msgpack"):
334+
def unserialize(message: bytes, use_packer: str = "MsgPack"):
331335
try:
332-
if use_packer == "json":
333-
return json.loads(message)
334-
else:
335-
return msgpack.unpackb(message, raw=False)
336+
packer: Packer = getattr(intelmq.lib.message, f"Packer{use_packer}")()
337+
return packer.unserialize(data=message)
336338
except Exception as exc:
337339
raise exceptions.UnserializationError(exception=exc, object=message)
338340

@@ -485,13 +487,13 @@ def to_dict(self, hierarchical: bool = False, with_type: bool = False,
485487

486488
return new_dict
487489

488-
def to_json(self, hierarchical=False, with_type=False, jsondict_as_string=False):
489-
json_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type)
490-
return json.dumps(json_dict, ensure_ascii=False, sort_keys=True)
491-
492-
def to_msgpack(self, hierarchical=False, with_type=False):
493-
msgpack_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type)
494-
return msgpack.packb(msgpack_dict)
490+
def to_pack(self, use_packer="MsgPack", hierarchical=False, with_type=False, **kwargs):
491+
try:
492+
packer: Packer = getattr(intelmq.lib.message, f"Packer{use_packer}")()
493+
data = self.to_dict(hierarchical=hierarchical, with_type=with_type)
494+
return packer.serialize(data, **kwargs)
495+
except Exception as exc:
496+
raise exceptions.SerializationError(exception=exc, object=self)
495497

496498
def __eq__(self, other: dict) -> bool:
497499
"""
@@ -590,3 +592,38 @@ def copy(self):
590592
if 'time.observation' in retval and 'time.observation' not in self:
591593
del retval['time.observation']
592594
return retval
595+
596+
597+
class Packer():
598+
def __init__(self) -> None:
599+
pass
600+
601+
def serialize(self, data: bytes, **kwargs):
602+
raise NotImplementedError()
603+
604+
def unserialize(self, data: bytes, **kwargs):
605+
raise NotImplementedError()
606+
607+
608+
class PackerMsgPack(Packer):
609+
def __init__(self) -> None:
610+
if msgpack is None:
611+
raise exceptions.MissingDependencyError("msgpack")
612+
super().__init__()
613+
614+
def serialize(self, data, **kwargs):
615+
return msgpack.packb(data, **kwargs)
616+
617+
def unserialize(self, data, **kwargs):
618+
return msgpack.unpackb(data, raw=False, **kwargs)
619+
620+
621+
class PackerJSON(Packer):
622+
def __init__(self) -> None:
623+
super().__init__()
624+
625+
def serialize(self, data, **kwargs):
626+
return json.dumps(data, **kwargs)
627+
628+
def unserialize(self, data, **kwargs):
629+
return json.loads(data, **kwargs)

intelmq/lib/test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def setUpClass(cls):
153153
elif cls.bot_type != 'collector' and cls.default_input_message == '':
154154
cls.default_input_message = {'__type': 'Event'}
155155
if type(cls.default_input_message) is dict:
156-
cls.default_input_message = msgpack.packb(cls.default_input_message)
156+
cls.default_input_message = message.MessageFactory.serialize(cls.default_input_message, os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
157157

158158
if cls.use_cache and not os.environ.get('INTELMQ_SKIP_REDIS'):
159159
password = os.environ.get('INTELMQ_TEST_REDIS_PASSWORD') or \
@@ -522,7 +522,7 @@ def assertMessageEqual(self, queue_pos, expected_msg, compare_raw=True, path="_d
522522
event = self.get_output_queue(path=path)[queue_pos]
523523
self.assertIsInstance(event, bytes)
524524

525-
event_dict = msgpack.unpackb(event, raw=False)
525+
event_dict = message.MessageFactory.unserialize(event, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
526526
if isinstance(expected_msg, (message.Event, message.Report)):
527527
expected = expected_msg.to_dict(with_type=True)
528528
else:

intelmq/tests/bots/experts/cymru_whois/test_expert.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
# SPDX-License-Identifier: AGPL-3.0-or-later
44

55
# -*- coding: utf-8 -*-
6-
import msgpack
6+
import os
77
import unittest
88

99
import intelmq.lib.test as test
1010
from intelmq.bots.experts.cymru_whois.expert import CymruExpertBot
11+
from work.intelmq.intelmq.lib import message
1112

1213
EXAMPLE_INPUT = {"__type": "Event",
1314
"source.ip": "78.104.144.2", # example.com
@@ -93,7 +94,7 @@ def test_6to4_result(self):
9394
"""
9495
self.input_message = EXAMPLE_6TO4_INPUT
9596
self.run_bot()
96-
actual = msgpack.loads(self.get_output_queue()[0])
97+
actual = message.MessageFactory.serialize(self.get_output_queue()[0], use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
9798
self.assertDictContainsSubset(EXAMPLE_6TO4_INPUT, actual)
9899
self.assertIn("source.asn", actual)
99100
self.assertIn("source.as_name", actual)

intelmq/tests/bots/experts/idea/test_expert.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
# -*- coding: utf-8 -*-
66
import unittest
77
import json
8-
import msgpack
98

109
import intelmq.lib.test as test
1110
from intelmq.lib.message import MessageFactory
@@ -89,7 +88,7 @@ def test_conversion(self):
8988
# the data from the "output" field and compare after removing ID's
9089
event = self.get_output_queue()[0]
9190
self.assertIsInstance(event, bytes)
92-
event_dict = MessageFactory.unserialize(event)
91+
event_dict = MessageFactory.unserialize(event, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
9392
self.assertIsInstance(event_dict, dict)
9493
self.assertTrue(b"output" in event_dict)
9594
idea_event = json.loads(event_dict["output"])

intelmq/tests/bots/parsers/json/test_parser.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import base64
77
import os
88
import unittest
9-
import json
10-
import msgpack
119

1210
import intelmq.lib.test as test
1311
from intelmq.bots.parsers.json.parser import JSONParserBot

intelmq/tests/lib/test_message.py

Lines changed: 20 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
Most tests are performed on Report, as it is formally the same as Message,
1111
but has a valid Harmonization configuration.
1212
"""
13-
import json
14-
import msgpack
13+
from cmath import exp
14+
import os
1515
import unittest
1616

1717
import pkg_resources
@@ -159,12 +159,14 @@ def test_event_ne_different_config(self):
159159
def test_invalid_type(self):
160160
""" Test if Message raises InvalidArgument for invalid type. """
161161
with self.assertRaises(exceptions.InvalidArgument):
162-
message.MessageFactory.unserialize(msgpack.dumps({"__type": "Message"}), harmonization=HARM)
162+
data = message.MessageFactory.serialize({"__type": "Message"}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
163+
message.MessageFactory.unserialize(data, harmonization=HARM, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
163164

164165
def test_invalid_type2(self):
165166
""" Test if MessageFactory raises InvalidArgument for invalid type. """
166167
with self.assertRaises(exceptions.InvalidArgument):
167-
message.MessageFactory.unserialize(msgpack.dumps({"__type": "Invalid"}), harmonization=HARM)
168+
data = message.MessageFactory.serialize({"__type": "Invalid"}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
169+
message.MessageFactory.unserialize(data, harmonization=HARM, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
168170

169171
def test_report_invalid_key(self):
170172
""" Test if report raises InvalidKey for invalid key in add(). """
@@ -365,9 +367,17 @@ def test_factory_serialize(self):
365367
report.add('feed.name', 'Example')
366368
report.add('feed.url', URL_SANE)
367369
report.add('raw', LOREM_BASE64, sanitize=False)
368-
actual = message.MessageFactory.serialize(report)
369-
expected = (b'\x84\xa9feed.name\xa7Example\xa8feed.url\xb4https://example.com/\xa3raw\xb0bG9yZW0gaXBzdW0=\xa6__type\xa6Report')
370-
self.assertDictEqual(msgpack.unpackb(expected), msgpack.unpackb(actual))
370+
actual = message.MessageFactory.serialize(report, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
371+
expected = message.MessageFactory.serialize({
372+
'feed.name': 'Example',
373+
'feed.url': 'https://example.com/',
374+
'raw': 'bG9yZW0gaXBzdW0=',
375+
'__type': 'Report',
376+
}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
377+
self.assertDictEqual(
378+
message.MessageFactory.unserialize(expected, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')),
379+
message.MessageFactory.unserialize(actual, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
380+
)
371381

372382
def test_deep_copy_content(self):
373383
""" Test if deep_copy does return the same items. """
@@ -495,54 +505,11 @@ def test_event_dict_hierarchical(self):
495505
'00:00'}},
496506
event.to_dict(hierarchical=True))
497507

498-
def test_event_json(self):
499-
""" Test Event to_json. """
500-
event = self.new_event()
501-
event = self.add_event_examples(event)
502-
actual = event.to_json()
503-
self.assertIsInstance(actual, str)
504-
expected = ('{"feed.url": "https://example.com/", "feed.name": '
505-
'"Example", "raw": "bG9yZW0gaXBzdW0=", "time.observation": '
506-
'"2015-01-01T13:37:00+00:00"}')
507-
self.assertDictEqual(json.loads(expected), json.loads(actual))
508-
509-
def test_event_json_hierarchical(self):
510-
""" Test Event to_json. """
511-
event = self.new_event()
512-
event = self.add_event_examples(event)
513-
actual = event.to_json(hierarchical=True)
514-
self.assertIsInstance(actual, str)
515-
expected = ('{"feed": {"url": "https://example.com/", "name": '
516-
'"Example"}, "raw": "bG9yZW0gaXBzdW0=", "time": '
517-
'{"observation": "2015-01-01T13:37:00+00:00"}}')
518-
self.assertDictEqual(json.loads(expected), json.loads(actual))
519-
520-
def test_event_msgpack(self):
521-
""" Test event to_msgpack """
522-
event = self.new_event()
523-
event = self.add_event_examples(event)
524-
actual = event.to_msgpack()
525-
self.assertIsInstance(actual, bytes)
526-
excepted = (b'\x84\xa9feed.name\xa7Example\xa8feed.url\xb4https://example.com/\xa3raw\xb0bG9yZW0gaXBzdW0=\xb0time.observation\xb92015-01-01T13:37:00+00:00')
527-
self.assertDictEqual(msgpack.unpackb(excepted), msgpack.unpackb(actual))
528-
529508
def test_event_serialize(self):
530509
""" Test Event serialize. """
531510
event = self.new_event()
532-
self.assertEqual(b'\x81\xa6__type\xa5Event',
533-
event.serialize())
534-
535-
def test_event_string(self):
536-
""" Test Event serialize. """
537-
event = self.new_event()
538-
self.assertEqual(b'\x81\xa6__type\xa5Event',
539-
event.serialize())
540-
541-
def test_event_unicode(self):
542-
""" Test Event serialize. """
543-
event = self.new_event()
544-
self.assertEqual(b'\x81\xa6__type\xa5Event',
545-
event.serialize())
511+
expected = message.MessageFactory.serialize({'__type': 'Event'}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
512+
self.assertEqual(expected, event.serialize())
546513

547514
def test_event_from_report(self):
548515
""" Data from report should be in event, except for extra. """
@@ -607,7 +574,7 @@ def test_event_init_check_tuple(self):
607574

608575
def test_event_init(self):
609576
""" Test if initialization method checks fields. """
610-
event = msgpack.dumps({"__type": "Event", "source.asn": "foo"})
577+
event = message.MessageFactory.serialize({"__type": "Event", "source.asn": "foo"}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack'))
611578
with self.assertRaises(exceptions.InvalidValue):
612579
message.MessageFactory.unserialize(event, harmonization=HARM)
613580

intelmq/tests/lib/test_parser_bot.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,10 @@ def set_bot(cls):
130130
cls.bot_reference = DummyParserBot
131131
cls.default_input_message = EXAMPLE_REPORT
132132
cls.allowed_error_count = 1
133-
cls.sysconfig = {'error_dump_message': True}
133+
cls.sysconfig = {
134+
'error_dump_message': True,
135+
'INTELMQ_USE_PACKER': 'JSON',
136+
}
134137

135138
def dump_message(self, error_traceback, message=None):
136139
self.assertDictEqual(EXPECTED_DUMP, message)

0 commit comments

Comments
 (0)