Skip to content

Commit 1be6bfe

Browse files
Ezibenroclemire
andauthored
Safe deserialization of bitmaps (#126)
Also upgrade Cython minimum version Co-authored-by: Daniel Lemire <[email protected]>
1 parent f719959 commit 1be6bfe

File tree

5 files changed

+47
-22
lines changed

5 files changed

+47
-22
lines changed

pyroaring/abstract_bitmap.pxi

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,32 @@ try:
1212
except NameError: # python 3
1313
pass
1414

15-
cdef croaring.roaring_bitmap_t *deserialize_ptr(char *buff):
15+
cdef croaring.roaring_bitmap_t *deserialize_ptr(bytes buff):
1616
cdef croaring.roaring_bitmap_t *ptr
17-
ptr = croaring.roaring_bitmap_portable_deserialize(buff)
17+
cdef const char *reason_failure = NULL
18+
buff_size = len(buff)
19+
ptr = croaring.roaring_bitmap_portable_deserialize_safe(buff, buff_size)
20+
if ptr == NULL:
21+
raise ValueError("Could not deserialize bitmap")
22+
# Validate the bitmap
23+
if not croaring.roaring_bitmap_internal_validate(ptr, &reason_failure):
24+
# If validation fails, free the bitmap and raise an exception
25+
croaring.roaring_bitmap_free(ptr)
26+
raise ValueError(f"Invalid bitmap after deserialization: {reason_failure.decode('utf-8')}")
1827
return ptr
1928

2029
cdef croaring.roaring64_bitmap_t *deserialize64_ptr(bytes buff):
2130
cdef croaring.roaring64_bitmap_t *ptr
31+
cdef const char *reason_failure = NULL
2232
buff_size = len(buff)
23-
bm_size = croaring.roaring64_bitmap_portable_deserialize_size(buff, buff_size)
24-
if bm_size == 0:
25-
raise ValueError("Invalid bitmap serialization")
26-
ptr = croaring.roaring64_bitmap_portable_deserialize_safe(buff, bm_size)
33+
ptr = croaring.roaring64_bitmap_portable_deserialize_safe(buff, buff_size)
34+
if ptr == NULL:
35+
raise ValueError("Could not deserialize bitmap")
36+
# Validate the bitmap
37+
if not croaring.roaring64_bitmap_internal_validate(ptr, &reason_failure):
38+
# If validation fails, free the bitmap and raise an exception
39+
croaring.roaring64_bitmap_free(ptr)
40+
raise ValueError(f"Invalid bitmap after deserialization: {reason_failure.decode('utf-8')}")
2741
return ptr
2842

2943
def _string_rep(bm):
@@ -744,7 +758,7 @@ cdef class AbstractBitMap:
744758

745759

746760
@classmethod
747-
def deserialize(cls, char *buff):
761+
def deserialize(cls, bytes buff):
748762
"""
749763
Generate a bitmap from the given serialization. See AbstractBitMap.serialize for the reverse operation.
750764

pyroaring/croaring.pxd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ cdef extern from "roaring.h":
100100
size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra)
101101
size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf)
102102
roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf)
103+
roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes)
104+
bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason)
103105
roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *ra)
104106
bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it)
105107
uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count)
@@ -163,6 +165,7 @@ cdef extern from "roaring.h":
163165
size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, char *buf)
164166
size_t roaring64_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes)
165167
roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes)
168+
bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, const char **reason)
166169
roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r)
167170
void roaring64_iterator_free(roaring64_iterator_t *it)
168171
bool roaring64_iterator_has_value(const roaring64_iterator_t *it)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888
version=VERSION,
8989
description='Library for handling efficiently sorted integer sets.',
9090
long_description=long_description,
91-
setup_requires=['cython'],
91+
setup_requires=['cython>=3.0.2'],
9292
url='https://github.com/Ezibenroc/PyRoaringBitMap',
9393
author='Tom Cornebize',
9494
author_email='[email protected]',

test.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import operator
1313
import unittest
1414
import functools
15+
import base64
1516
from typing import TYPE_CHECKING
1617
from collections.abc import Set, Callable, Iterable, Iterator
1718

@@ -886,6 +887,27 @@ def test_pickle_protocol(
886887
assert old_bm == new_bm
887888
self.assert_is_not(old_bm, new_bm)
888889

890+
@given(bitmap_cls)
891+
def test_impossible_deserialization(
892+
self,
893+
cls: type[EitherBitMap],
894+
) -> None:
895+
wrong_input = base64.b64decode('aGVsbG8gd29ybGQ=')
896+
with pytest.raises(ValueError, match='Could not deserialize bitmap'):
897+
bitmap = cls.deserialize(wrong_input)
898+
899+
@given(bitmap_cls)
900+
def test_invalid_deserialization(
901+
self,
902+
cls: type[EitherBitMap],
903+
) -> None:
904+
wrong_input = base64.b64decode('aGVsbG8gd29ybGQ=')
905+
bm = cls(list(range(0, 1000000, 3)))
906+
bitmap_bytes = bm.serialize()
907+
bitmap_bytes = bitmap_bytes[:42] + wrong_input + bitmap_bytes[42:]
908+
with pytest.raises(ValueError, match='Invalid bitmap after deserialization'):
909+
bitmap = cls.deserialize(bitmap_bytes)
910+
889911

890912
class TestStatistics(Util):
891913

tox.ini

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
[tox]
22
envlist =
3-
cython_pre3
43
cython3
54
test_wheel
65

@@ -10,19 +9,6 @@ setenv =
109
PYTHONFAULTHANDLER=1
1110

1211

13-
[testenv:cython_pre3]
14-
deps =
15-
hypothesis
16-
pytest
17-
cython<3.0.0
18-
passenv =
19-
HYPOTHESIS_PROFILE
20-
ROARING_BITSIZE
21-
commands =
22-
py.test -v test.py test_state_machine.py
23-
python cydoctest.py
24-
25-
2612
[testenv:cython3]
2713
deps =
2814
hypothesis

0 commit comments

Comments
 (0)