Skip to content

Commit bd08e80

Browse files
authored
Merge pull request #100 from danni-m/master
Module's data type support
2 parents fe322ed + 6e4dba6 commit bd08e80

File tree

5 files changed

+174
-29
lines changed

5 files changed

+174
-29
lines changed

rdbtools/iowrapper.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
2+
class IOWrapper(object):
3+
def __init__(self, io_object):
4+
self.io_object = io_object
5+
self.record_buffer = False
6+
self.record_buffer_size = False
7+
self.bytes = bytes()
8+
self.buffer_size = 0
9+
10+
def start_recording(self):
11+
self.record_buffer = True
12+
13+
def start_recording_size(self):
14+
self.record_buffer_size = True
15+
16+
def get_recorded_buffer(self):
17+
return self.bytes
18+
19+
def get_recorded_size(self):
20+
return self.buffer_size
21+
22+
def stop_recording(self):
23+
self.record_buffer = False
24+
self.bytes = bytes()
25+
26+
def stop_recording_size(self):
27+
self.record_buffer_size = True
28+
self.buffer_size = 0
29+
30+
def read(self, n_bytes):
31+
current_bytes = self.io_object.read(n_bytes)
32+
33+
if self.record_buffer:
34+
self.bytes += current_bytes
35+
36+
if self.record_buffer_size:
37+
self.buffer_size += len(current_bytes)
38+
39+
return current_bytes

rdbtools/memprofiler.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def __init__(self, stream, architecture, redis_version='3.2', string_escape=None
145145
self._pointer_size = 4
146146
self._long_size = 4
147147
self._architecture = 32
148-
148+
149149
def emit_record(self, record_type, key, byte_count, encoding, size, largest_el):
150150
if key is not None:
151151
key = bytes_to_unicode(key, self._escape, skip_printable=True)
@@ -298,7 +298,19 @@ def end_list(self, key, info):
298298
self.emit_record("list", key, self._current_size, self._current_encoding, self._current_length,
299299
self._len_largest_element)
300300
self.end_key()
301-
301+
302+
def start_module(self, key, module_id, expiry):
303+
self._current_encoding = module_id
304+
self._current_size = self.top_level_object_overhead(key, expiry)
305+
self._current_size += 8 + 1 # add the module id length and EOF byte
306+
307+
return False # don't build the full key buffer
308+
309+
def end_module(self, key, buffer_size, buffer=None):
310+
size = self._current_size + buffer_size
311+
self.emit_record("module", key, size, self._current_encoding, 1, size)
312+
self.end_key()
313+
302314
def start_sorted_set(self, key, length, expiry, info):
303315
self._current_length = length
304316
self._current_encoding = info['encoding']
@@ -479,6 +491,7 @@ def zset_random_level(self):
479491

480492
MAXINT = 2**63 - 1
481493

494+
482495
def element_length(element):
483496
if isinstance(element, int):
484497
if element < - MAXINT - 1 or element > MAXINT:

rdbtools/parser.py

Lines changed: 103 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import struct
22
import io
3-
import sys
43
import datetime
54
import re
65

76
from rdbtools.encodehelpers import STRING_ESCAPE_RAW, apply_escape_bytes, bval
87
from .compat import range, str2regexp
8+
from .iowrapper import IOWrapper
99

1010
try:
1111
try:
@@ -41,6 +41,7 @@
4141
REDIS_RDB_TYPE_HASH = 4
4242
REDIS_RDB_TYPE_ZSET_2 = 5 # ZSET version 2 with doubles stored in binary.
4343
REDIS_RDB_TYPE_MODULE = 6
44+
REDIS_RDB_TYPE_MODULE_2 = 7
4445
REDIS_RDB_TYPE_HASH_ZIPMAP = 9
4546
REDIS_RDB_TYPE_LIST_ZIPLIST = 10
4647
REDIS_RDB_TYPE_SET_INTSET = 11
@@ -53,8 +54,15 @@
5354
REDIS_RDB_ENC_INT32 = 2
5455
REDIS_RDB_ENC_LZF = 3
5556

57+
REDIS_RDB_MODULE_OPCODE_EOF = 0 # End of module value.
58+
REDIS_RDB_MODULE_OPCODE_SINT = 1
59+
REDIS_RDB_MODULE_OPCODE_UINT = 2
60+
REDIS_RDB_MODULE_OPCODE_FLOAT = 3
61+
REDIS_RDB_MODULE_OPCODE_DOUBLE = 4
62+
REDIS_RDB_MODULE_OPCODE_STRING = 5
63+
5664
DATA_TYPE_MAPPING = {
57-
0 : "string", 1 : "list", 2 : "set", 3 : "sortedset", 4 : "hash", 5 : "sortedset", 6 : "module",
65+
0 : "string", 1 : "list", 2 : "set", 3 : "sortedset", 4 : "hash", 5 : "sortedset", 6 : "module", 7: "module",
5866
9 : "hash", 10 : "list", 11 : "set", 12 : "sortedset", 13 : "hash", 14 : "list"}
5967

6068
class RdbCallback(object):
@@ -106,7 +114,23 @@ def start_database(self, db_number):
106114
107115
Typically, callbacks store the current database number in a class variable
108116
109-
"""
117+
"""
118+
pass
119+
120+
def start_module(self, key, module_name, expiry):
121+
"""
122+
Called to indicate start of a module key
123+
:param key: string
124+
:param module_name: string
125+
:param expiry:
126+
:return: boolean to indicate whatever to record the full buffer or not
127+
"""
128+
return False
129+
130+
def handle_module_data(self, key, opcode, data):
131+
pass
132+
133+
def end_module(self, key, buffer_size, buffer=None):
110134
pass
111135

112136
def db_size(self, db_size, expires_size):
@@ -366,14 +390,14 @@ def parse_fd(self, fd):
366390
self._callback.db_size(db_size, expire_size)
367391
continue
368392

369-
if data_type == REDIS_RDB_OPCODE_EOF :
393+
if data_type == REDIS_RDB_OPCODE_EOF:
370394
self._callback.end_database(db_number)
371395
self._callback.end_rdb()
372396
if self._rdb_version >= 5:
373397
f.read(8)
374398
break
375399

376-
if self.matches_filter(db_number) :
400+
if self.matches_filter(db_number):
377401
self._key = self.read_string(f)
378402
if self.matches_filter(db_number, self._key, data_type):
379403
self.read_object(f, data_type)
@@ -382,20 +406,20 @@ def parse_fd(self, fd):
382406
else :
383407
self.skip_key_and_object(f, data_type)
384408

385-
def read_length_with_encoding(self, f) :
409+
def read_length_with_encoding(self, f):
386410
length = 0
387411
is_encoded = False
388412
bytes = []
389413
bytes.append(read_unsigned_char(f))
390414
enc_type = (bytes[0] & 0xC0) >> 6
391-
if enc_type == REDIS_RDB_ENCVAL :
415+
if enc_type == REDIS_RDB_ENCVAL:
392416
is_encoded = True
393417
length = bytes[0] & 0x3F
394-
elif enc_type == REDIS_RDB_6BITLEN :
418+
elif enc_type == REDIS_RDB_6BITLEN:
395419
length = bytes[0] & 0x3F
396-
elif enc_type == REDIS_RDB_14BITLEN :
420+
elif enc_type == REDIS_RDB_14BITLEN:
397421
bytes.append(read_unsigned_char(f))
398-
length = ((bytes[0]&0x3F)<<8)|bytes[1]
422+
length = ((bytes[0] & 0x3F) << 8) | bytes[1]
399423
elif bytes[0] == REDIS_RDB_32BITLEN:
400424
length = read_unsigned_int_be(f)
401425
elif bytes[0] == REDIS_RDB_64BITLEN:
@@ -460,47 +484,49 @@ def read_object(self, f, enc_type) :
460484
val = self.read_string(f)
461485
self._callback.rpush(self._key, val)
462486
self._callback.end_list(self._key, info={'encoding':'linkedlist' })
463-
elif enc_type == REDIS_RDB_TYPE_SET :
487+
elif enc_type == REDIS_RDB_TYPE_SET:
464488
# A redis list is just a sequence of strings
465489
# We successively read strings from the stream and create a set from it
466490
# Note that the order of strings is non-deterministic
467491
length = self.read_length(f)
468492
self._callback.start_set(self._key, length, self._expiry, info={'encoding':'hashtable'})
469-
for count in range(0, length) :
493+
for count in range(0, length):
470494
val = self.read_string(f)
471495
self._callback.sadd(self._key, val)
472496
self._callback.end_set(self._key)
473497
elif enc_type == REDIS_RDB_TYPE_ZSET or enc_type == REDIS_RDB_TYPE_ZSET_2 :
474498
length = self.read_length(f)
475499
self._callback.start_sorted_set(self._key, length, self._expiry, info={'encoding':'skiplist'})
476-
for count in range(0, length) :
500+
for count in range(0, length):
477501
val = self.read_string(f)
478502
score = read_double(f) if enc_type == REDIS_RDB_TYPE_ZSET_2 else self.read_float(f)
479503
self._callback.zadd(self._key, score, val)
480504
self._callback.end_sorted_set(self._key)
481-
elif enc_type == REDIS_RDB_TYPE_HASH :
505+
elif enc_type == REDIS_RDB_TYPE_HASH:
482506
length = self.read_length(f)
483507
self._callback.start_hash(self._key, length, self._expiry, info={'encoding':'hashtable'})
484-
for count in range(0, length) :
508+
for count in range(0, length):
485509
field = self.read_string(f)
486510
value = self.read_string(f)
487511
self._callback.hset(self._key, field, value)
488512
self._callback.end_hash(self._key)
489-
elif enc_type == REDIS_RDB_TYPE_HASH_ZIPMAP :
513+
elif enc_type == REDIS_RDB_TYPE_HASH_ZIPMAP:
490514
self.read_zipmap(f)
491-
elif enc_type == REDIS_RDB_TYPE_LIST_ZIPLIST :
515+
elif enc_type == REDIS_RDB_TYPE_LIST_ZIPLIST:
492516
self.read_ziplist(f)
493-
elif enc_type == REDIS_RDB_TYPE_SET_INTSET :
517+
elif enc_type == REDIS_RDB_TYPE_SET_INTSET:
494518
self.read_intset(f)
495-
elif enc_type == REDIS_RDB_TYPE_ZSET_ZIPLIST :
519+
elif enc_type == REDIS_RDB_TYPE_ZSET_ZIPLIST:
496520
self.read_zset_from_ziplist(f)
497-
elif enc_type == REDIS_RDB_TYPE_HASH_ZIPLIST :
521+
elif enc_type == REDIS_RDB_TYPE_HASH_ZIPLIST:
498522
self.read_hash_from_ziplist(f)
499523
elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST:
500524
self.read_list_from_quicklist(f)
501-
elif enc_type == REDIS_RDB_TYPE_MODULE :
502-
raise Exception('read_object', 'Unable to read Redis Modules RDB objects (key %s)' % (enc_type, self._key))
503-
else :
525+
elif enc_type == REDIS_RDB_TYPE_MODULE:
526+
raise Exception('read_object', 'Unable to read Redis Modules RDB objects (key %s)' % self._key)
527+
elif enc_type == REDIS_RDB_TYPE_MODULE_2:
528+
self.read_module(f)
529+
else:
504530
raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
505531

506532
def skip_key_and_object(self, f, data_type):
@@ -564,8 +590,10 @@ def skip_object(self, f, enc_type):
564590
elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST:
565591
skip_strings = self.read_length(f)
566592
elif enc_type == REDIS_RDB_TYPE_MODULE:
567-
raise Exception('skip_object', 'Unable to skip Redis Modules RDB objects (key %s)' % (enc_type, self._key))
568-
else :
593+
raise Exception('skip_object', 'Unable to skip Redis Modules RDB objects (key %s)' % self._key)
594+
elif enc_type == REDIS_RDB_TYPE_MODULE_2:
595+
self.read_module(f)
596+
else:
569597
raise Exception('skip_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
570598
for x in range(0, skip_strings):
571599
self.skip_string(f)
@@ -728,6 +756,56 @@ def read_zipmap_next_length(self, f) :
728756
else:
729757
return None
730758

759+
def read_module(self, f):
760+
# this method is based on the actual implementation in redis (src/rdb.c:rdbLoadObject)
761+
iowrapper = IOWrapper(f)
762+
iowrapper.start_recording_size()
763+
iowrapper.start_recording()
764+
length, encoding = self.read_length_with_encoding(iowrapper)
765+
record_buffer = self._callback.start_module(self._key, self._decode_module_id(length), self._expiry)
766+
767+
if not record_buffer:
768+
iowrapper.stop_recording()
769+
770+
opcode = self.read_length(iowrapper)
771+
while opcode != REDIS_RDB_MODULE_OPCODE_EOF:
772+
if opcode == REDIS_RDB_MODULE_OPCODE_SINT or opcode == REDIS_RDB_MODULE_OPCODE_UINT:
773+
data = self.read_length(iowrapper)
774+
elif opcode == REDIS_RDB_MODULE_OPCODE_FLOAT:
775+
data = self.read_float(iowrapper)
776+
elif opcode == REDIS_RDB_MODULE_OPCODE_DOUBLE:
777+
data = read_double(iowrapper)
778+
elif opcode == REDIS_RDB_MODULE_OPCODE_STRING:
779+
data = self.read_string(iowrapper)
780+
else:
781+
raise Exception("Unknown module opcode %s" % opcode)
782+
self._callback.handle_module_data(self._key, opcode, data)
783+
# read the next item in the module data type
784+
opcode = self.read_length(iowrapper)
785+
786+
buffer = None
787+
if record_buffer:
788+
# prepand the buffer with REDIS_RDB_TYPE_MODULE_2 type
789+
buffer = struct.pack('B', REDIS_RDB_TYPE_MODULE_2) + iowrapper.get_recorded_buffer()
790+
iowrapper.stop_recording()
791+
self._callback.end_module(self._key, buffer_size=iowrapper.get_recorded_size(), buffer=buffer)
792+
793+
charset = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
794+
795+
def _decode_module_id(self, module_id):
796+
"""
797+
decode module id to string
798+
based on @antirez moduleTypeNameByID function from redis/src/module.c
799+
:param module_id: 64bit integer
800+
:return: string
801+
"""
802+
name = [''] * 9
803+
module_id >>= 10
804+
for i in reversed(range(9)):
805+
name[i] = self.charset[module_id & 63]
806+
module_id >>= 6
807+
return ''.join(name)
808+
731809
def verify_magic_string(self, magic_string) :
732810
if magic_string != b'REDIS' :
733811
raise Exception('verify_magic_string', 'Invalid File Format')
288 Bytes
Binary file not shown.

tests/memprofiler_tests.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,25 @@
44
from rdbtools import MemoryCallback
55
import os
66

7+
from rdbtools.memprofiler import MemoryRecord
8+
9+
710
class Stats(object):
811
def __init__(self):
912
self.records = {}
10-
13+
1114
def next_record(self, record):
1215
self.records[record.key] = record
1316

17+
1418
def get_stats(file_name):
1519
stats = Stats()
1620
callback = MemoryCallback(stats, 64)
1721
parser = RdbParser(callback)
1822
parser.parse(os.path.join(os.path.dirname(__file__), 'dumps', file_name))
1923
return stats.records
20-
24+
25+
2126
class MemoryCallbackTestCase(unittest.TestCase):
2227
def setUp(self):
2328
pass
@@ -26,3 +31,13 @@ def test_len_largest_element(self):
2631
stats = get_stats('ziplist_that_compresses_easily.rdb')
2732

2833
self.assertEqual(stats['ziplist_compresses_easily'].len_largest_element, 36, "Length of largest element does not match")
34+
35+
def test_rdb_with_module(self):
36+
stats = get_stats('redis_40_with_module.rdb')
37+
38+
self.assertTrue('simplekey' in stats)
39+
self.assertTrue('foo' in stats)
40+
expected_record = MemoryRecord(database=0, type='module', key='foo',
41+
bytes=101, encoding='ReJSON-RL', size=1,
42+
len_largest_element=101)
43+
self.assertEquals(stats['foo'], expected_record)

0 commit comments

Comments
 (0)