Skip to content

Commit cd87a07

Browse files
committed
Add the base files for fuzzing cereggii.
1 parent 07f2520 commit cd87a07

15 files changed

+3443
-0
lines changed
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
"""
2+
This module generates tricky inputs for fuzzing cereggii.AtomicDict.
3+
4+
It focuses on creating objects with non-standard, malicious, or unstable
5+
__hash__ and __eq__ methods to probe the robustness of the hash table's
6+
C implementation.
7+
"""
8+
9+
import cereggii
10+
import itertools
11+
import random
12+
import collections.abc
13+
import sys
14+
15+
# --- 1. Imports from other tricky modules ---
16+
# We gather a wide range of pre-existing tricky objects to use as keys/values.
17+
# This is wrapped in try-except to allow this module to run standalone.
18+
try:
19+
from fusil.python.samples import tricky_objects, weird_classes # , tricky_numpy
20+
21+
_HAS_DEPS = True
22+
except ImportError:
23+
_HAS_DEPS = False
24+
print(
25+
"Warning: Sibling tricky modules not found. Key/value variety will be limited.",
26+
file=sys.stderr,
27+
)
28+
29+
30+
# --- 2. Malicious Classes for Hashing & Equality Hell ---
31+
# This suite of classes is designed to violate the core contracts of Python's
32+
# hashing and equality, targeting the underlying assumptions of any hash table.
33+
34+
_unstable_hash_counter = itertools.count()
35+
36+
37+
class UnstableHash:
38+
"""
39+
An object whose hash value changes every time it is requested.
40+
This violates the rule that an object's hash must be constant during its
41+
lifetime if it is in a hash-based collection.
42+
"""
43+
44+
def __init__(self):
45+
# Store the initial hash to have a stable representation, even if the
46+
# internal hash is unstable.
47+
self._initial_hash = next(_unstable_hash_counter)
48+
49+
def __hash__(self):
50+
return next(_unstable_hash_counter)
51+
52+
def __eq__(self, other):
53+
return (
54+
isinstance(other, UnstableHash)
55+
and self._initial_hash == other._initial_hash
56+
)
57+
58+
def __repr__(self):
59+
return f"<UnstableHash initial_hash={self._initial_hash}>"
60+
61+
62+
class AlwaysEqualButRandomHash:
63+
"""
64+
An object that claims to be equal to everything, but provides a different
65+
random hash each time. This forces hash collisions to be resolved by __eq__,
66+
stressing the collision resolution path.
67+
"""
68+
69+
def __hash__(self):
70+
return random.randint(-sys.maxsize - 1, sys.maxsize)
71+
72+
def __eq__(self, other):
73+
return True
74+
75+
def __repr__(self):
76+
return "<AlwaysEqualButRandomHash>"
77+
78+
79+
class AlwaysUnequalConstantHash:
80+
"""
81+
An object that is never equal to anything (even itself, unless via identity),
82+
but always has the same hash. This is designed to create a massive number
83+
of hash collisions in a single bucket, forcing the C implementation to
84+
traverse a long probe sequence.
85+
"""
86+
87+
def __hash__(self):
88+
return 42 # A classic constant hash value
89+
90+
def __eq__(self, other):
91+
return False # Never equal
92+
93+
def __repr__(self):
94+
return f"<AlwaysUnequalConstantHash id={id(self)}>"
95+
96+
97+
class ExceptionRaiser:
98+
"""
99+
A base for objects that raise exceptions from within __hash__ or __eq__.
100+
This allows us to test C-level exception handling during core dict operations.
101+
"""
102+
103+
def __init__(self, exc_type=ValueError, msg="fuzzer-induced exception"):
104+
self.exc_type = exc_type
105+
self.msg = msg
106+
# A simple unique identifier for repr
107+
self._id = next(_unstable_hash_counter)
108+
109+
def _raise(self):
110+
raise self.exc_type(self.msg)
111+
112+
def __repr__(self):
113+
return (
114+
f"<{self.__class__.__name__} raises={self.exc_type.__name__} id={self._id}>"
115+
)
116+
117+
118+
class HashRaisesException(ExceptionRaiser):
119+
"""An object that raises an exception when its hash is computed."""
120+
121+
def __hash__(self):
122+
self._raise()
123+
124+
def __eq__(self, other):
125+
return self is other # Should not be reached if hash fails
126+
127+
128+
class EqRaisesException(ExceptionRaiser):
129+
"""An object with a constant hash that raises an exception on equality check."""
130+
131+
def __hash__(self):
132+
return 101 # Another constant hash
133+
134+
def __eq__(self, other):
135+
self._raise()
136+
137+
138+
class EqReturnsWrongType:
139+
"""
140+
An object with a constant hash whose __eq__ method returns a non-boolean value.
141+
This tests the C code's handling of unexpected return types from comparisons.
142+
"""
143+
144+
def __init__(self, return_value):
145+
self.return_value = return_value
146+
147+
def __hash__(self):
148+
return 255 # Yet another constant hash
149+
150+
def __eq__(self, other):
151+
return self.return_value
152+
153+
def __repr__(self):
154+
return f"<EqReturnsWrongType returns={self.return_value!r}>"
155+
156+
157+
# --- Sanity Check ---
158+
print("Defined malicious classes for hashing and equality.")
159+
160+
161+
# --- 3. Aggregate Tricky Hashable Keys ---
162+
# A comprehensive dictionary of objects that are technically hashable but are
163+
# designed to stress the hash table implementation. This serves as a rich
164+
# pool of keys for fuzzing operations.
165+
166+
tricky_hashable_keys = {}
167+
168+
# Instantiate our malicious classes
169+
for i in range(5):
170+
tricky_hashable_keys[f"unstable_hash_{i}"] = UnstableHash()
171+
tricky_hashable_keys[f"always_equal_random_hash_{i}"] = AlwaysEqualButRandomHash()
172+
for i in range(20): # More of these to create collisions
173+
tricky_hashable_keys[f"always_unequal_constant_hash_{i}"] = (
174+
AlwaysUnequalConstantHash()
175+
)
176+
177+
# Instances that raise various exceptions from __hash__ or __eq__
178+
for exc in [
179+
ValueError,
180+
TypeError,
181+
AttributeError,
182+
RecursionError,
183+
IndexError,
184+
KeyError,
185+
]:
186+
tricky_hashable_keys[f"hash_raises_{exc.__name__}"] = HashRaisesException(exc)
187+
tricky_hashable_keys[f"eq_raises_{exc.__name__}"] = EqRaisesException(exc)
188+
189+
# Instances that return non-boolean values from __eq__
190+
for ret_val in [None, 0, 1, "not a boolean", (1, 2), AlwaysUnequalConstantHash()]:
191+
tricky_hashable_keys[f"eq_returns_{type(ret_val).__name__}"] = EqReturnsWrongType(
192+
ret_val
193+
)
194+
195+
# Add fundamental edge-case hashables
196+
tricky_hashable_keys["none"] = None
197+
tricky_hashable_keys["true"] = True
198+
tricky_hashable_keys["false"] = False
199+
tricky_hashable_keys["float_nan"] = float("nan")
200+
tricky_hashable_keys["float_inf"] = float("inf")
201+
tricky_hashable_keys["empty_tuple"] = ()
202+
tricky_hashable_keys["empty_frozenset"] = frozenset()
203+
204+
# Gather all hashable objects from our other tricky modules
205+
if _HAS_DEPS:
206+
all_tricky_sources = {
207+
"tricky_obj": tricky_objects.__dict__,
208+
"weird_cls": weird_classes.weird_instances,
209+
# "tricky_np": tricky_numpy.__dict__,
210+
}
211+
for source_name, source_dict in all_tricky_sources.items():
212+
for name, obj in source_dict.items():
213+
if isinstance(name, str) and not name.startswith("_"):
214+
try:
215+
if isinstance(obj, collections.abc.Hashable):
216+
tricky_hashable_keys[f"{source_name}_{name}"] = obj
217+
except Exception:
218+
# Some objects might fail even on isinstance checks
219+
continue
220+
221+
222+
# --- 4. Pre-populated Malicious AtomicDict Instances ---
223+
# A collection of AtomicDicts that are already initialized with problematic
224+
# keys and structures. These serve as ready-made targets for fuzzing operations
225+
# like get, set, delete, reduce, etc.
226+
227+
tricky_atomic_dicts = {}
228+
229+
# "Collision Hell": A dict with many keys that have the same hash but are not equal.
230+
# This forces the C implementation to traverse a long probe sequence.
231+
collision_hell_dict = cereggii.AtomicDict()
232+
for i in range(50):
233+
collision_hell_dict[AlwaysUnequalConstantHash()] = i
234+
tricky_atomic_dicts["atomicdict_collision_hell"] = collision_hell_dict
235+
236+
# "Unstable Hash": A dict populated with keys whose hash value changes.
237+
unstable_hash_dict = cereggii.AtomicDict()
238+
for i in range(10):
239+
unstable_hash_dict[UnstableHash()] = i
240+
tricky_atomic_dicts["atomicdict_unstable_hash"] = unstable_hash_dict
241+
242+
# "Weird Keys": A dict populated with a diverse sample of weird but hashable keys.
243+
weird_keys_dict = cereggii.AtomicDict()
244+
# Take a sample of our tricky keys to populate the dict
245+
sample_keys = list(tricky_hashable_keys.values())
246+
random.shuffle(sample_keys)
247+
for i, key in enumerate(sample_keys[:50]): # Populate with up to 50 weird keys
248+
try:
249+
weird_keys_dict[key] = i
250+
except Exception: # Some keys might fail insertion, which is fine
251+
continue
252+
tricky_atomic_dicts["atomicdict_weird_keys"] = weird_keys_dict
253+
254+
# "Recursive Dict": A dict that contains a reference to itself.
255+
recursive_dict = cereggii.AtomicDict()
256+
recursive_dict["self_ref"] = recursive_dict
257+
tricky_atomic_dicts["atomicdict_self_recursive"] = recursive_dict
258+
259+
# "Cross-Recursive Dicts": Two dicts that reference each other.
260+
cross_recursive_dict1 = cereggii.AtomicDict()
261+
cross_recursive_dict2 = cereggii.AtomicDict()
262+
cross_recursive_dict1["other"] = cross_recursive_dict2
263+
cross_recursive_dict2["other"] = cross_recursive_dict1
264+
tricky_atomic_dicts["atomicdict_cross_recursive_1"] = cross_recursive_dict1
265+
tricky_atomic_dicts["atomicdict_cross_recursive_2"] = cross_recursive_dict2
266+
267+
# An AtomicDict initialized from a standard recursive Python dict
268+
try:
269+
std_recursive_dict = {}
270+
std_recursive_dict["self"] = std_recursive_dict
271+
tricky_atomic_dicts["atomicdict_from_std_recursive"] = cereggii.AtomicDict(
272+
std_recursive_dict
273+
)
274+
except Exception:
275+
pass # This might raise RecursionError, which is fine
276+
277+
# --- Sanity Check ---
278+
print(f"Aggregated {len(tricky_hashable_keys)} tricky hashable keys.")
279+
print(
280+
f"Created {len(tricky_atomic_dicts)} pre-populated malicious AtomicDict instances."
281+
)

0 commit comments

Comments
 (0)