|
| 1 | +""" |
| 2 | +This module generates tricky inputs for fuzzing cereggii.AtomicDict. |
| 3 | +
|
| 4 | +It focuses on creating objects with non-standard, malicious, or unstable |
| 5 | +__hash__ and __eq__ methods to probe the robustness of the hash table's |
| 6 | +C implementation. |
| 7 | +""" |
| 8 | + |
| 9 | +import cereggii |
| 10 | +import itertools |
| 11 | +import random |
| 12 | +import collections.abc |
| 13 | +import sys |
| 14 | + |
| 15 | +# --- 1. Imports from other tricky modules --- |
| 16 | +# We gather a wide range of pre-existing tricky objects to use as keys/values. |
| 17 | +# This is wrapped in try-except to allow this module to run standalone. |
| 18 | +try: |
| 19 | + from fusil.python.samples import tricky_objects, weird_classes # , tricky_numpy |
| 20 | + |
| 21 | + _HAS_DEPS = True |
| 22 | +except ImportError: |
| 23 | + _HAS_DEPS = False |
| 24 | + print( |
| 25 | + "Warning: Sibling tricky modules not found. Key/value variety will be limited.", |
| 26 | + file=sys.stderr, |
| 27 | + ) |
| 28 | + |
| 29 | + |
| 30 | +# --- 2. Malicious Classes for Hashing & Equality Hell --- |
| 31 | +# This suite of classes is designed to violate the core contracts of Python's |
| 32 | +# hashing and equality, targeting the underlying assumptions of any hash table. |
| 33 | + |
| 34 | +_unstable_hash_counter = itertools.count() |
| 35 | + |
| 36 | + |
| 37 | +class UnstableHash: |
| 38 | + """ |
| 39 | + An object whose hash value changes every time it is requested. |
| 40 | + This violates the rule that an object's hash must be constant during its |
| 41 | + lifetime if it is in a hash-based collection. |
| 42 | + """ |
| 43 | + |
| 44 | + def __init__(self): |
| 45 | + # Store the initial hash to have a stable representation, even if the |
| 46 | + # internal hash is unstable. |
| 47 | + self._initial_hash = next(_unstable_hash_counter) |
| 48 | + |
| 49 | + def __hash__(self): |
| 50 | + return next(_unstable_hash_counter) |
| 51 | + |
| 52 | + def __eq__(self, other): |
| 53 | + return ( |
| 54 | + isinstance(other, UnstableHash) |
| 55 | + and self._initial_hash == other._initial_hash |
| 56 | + ) |
| 57 | + |
| 58 | + def __repr__(self): |
| 59 | + return f"<UnstableHash initial_hash={self._initial_hash}>" |
| 60 | + |
| 61 | + |
| 62 | +class AlwaysEqualButRandomHash: |
| 63 | + """ |
| 64 | + An object that claims to be equal to everything, but provides a different |
| 65 | + random hash each time. This forces hash collisions to be resolved by __eq__, |
| 66 | + stressing the collision resolution path. |
| 67 | + """ |
| 68 | + |
| 69 | + def __hash__(self): |
| 70 | + return random.randint(-sys.maxsize - 1, sys.maxsize) |
| 71 | + |
| 72 | + def __eq__(self, other): |
| 73 | + return True |
| 74 | + |
| 75 | + def __repr__(self): |
| 76 | + return "<AlwaysEqualButRandomHash>" |
| 77 | + |
| 78 | + |
| 79 | +class AlwaysUnequalConstantHash: |
| 80 | + """ |
| 81 | + An object that is never equal to anything (even itself, unless via identity), |
| 82 | + but always has the same hash. This is designed to create a massive number |
| 83 | + of hash collisions in a single bucket, forcing the C implementation to |
| 84 | + traverse a long probe sequence. |
| 85 | + """ |
| 86 | + |
| 87 | + def __hash__(self): |
| 88 | + return 42 # A classic constant hash value |
| 89 | + |
| 90 | + def __eq__(self, other): |
| 91 | + return False # Never equal |
| 92 | + |
| 93 | + def __repr__(self): |
| 94 | + return f"<AlwaysUnequalConstantHash id={id(self)}>" |
| 95 | + |
| 96 | + |
| 97 | +class ExceptionRaiser: |
| 98 | + """ |
| 99 | + A base for objects that raise exceptions from within __hash__ or __eq__. |
| 100 | + This allows us to test C-level exception handling during core dict operations. |
| 101 | + """ |
| 102 | + |
| 103 | + def __init__(self, exc_type=ValueError, msg="fuzzer-induced exception"): |
| 104 | + self.exc_type = exc_type |
| 105 | + self.msg = msg |
| 106 | + # A simple unique identifier for repr |
| 107 | + self._id = next(_unstable_hash_counter) |
| 108 | + |
| 109 | + def _raise(self): |
| 110 | + raise self.exc_type(self.msg) |
| 111 | + |
| 112 | + def __repr__(self): |
| 113 | + return ( |
| 114 | + f"<{self.__class__.__name__} raises={self.exc_type.__name__} id={self._id}>" |
| 115 | + ) |
| 116 | + |
| 117 | + |
| 118 | +class HashRaisesException(ExceptionRaiser): |
| 119 | + """An object that raises an exception when its hash is computed.""" |
| 120 | + |
| 121 | + def __hash__(self): |
| 122 | + self._raise() |
| 123 | + |
| 124 | + def __eq__(self, other): |
| 125 | + return self is other # Should not be reached if hash fails |
| 126 | + |
| 127 | + |
| 128 | +class EqRaisesException(ExceptionRaiser): |
| 129 | + """An object with a constant hash that raises an exception on equality check.""" |
| 130 | + |
| 131 | + def __hash__(self): |
| 132 | + return 101 # Another constant hash |
| 133 | + |
| 134 | + def __eq__(self, other): |
| 135 | + self._raise() |
| 136 | + |
| 137 | + |
| 138 | +class EqReturnsWrongType: |
| 139 | + """ |
| 140 | + An object with a constant hash whose __eq__ method returns a non-boolean value. |
| 141 | + This tests the C code's handling of unexpected return types from comparisons. |
| 142 | + """ |
| 143 | + |
| 144 | + def __init__(self, return_value): |
| 145 | + self.return_value = return_value |
| 146 | + |
| 147 | + def __hash__(self): |
| 148 | + return 255 # Yet another constant hash |
| 149 | + |
| 150 | + def __eq__(self, other): |
| 151 | + return self.return_value |
| 152 | + |
| 153 | + def __repr__(self): |
| 154 | + return f"<EqReturnsWrongType returns={self.return_value!r}>" |
| 155 | + |
| 156 | + |
| 157 | +# --- Sanity Check --- |
| 158 | +print("Defined malicious classes for hashing and equality.") |
| 159 | + |
| 160 | + |
| 161 | +# --- 3. Aggregate Tricky Hashable Keys --- |
| 162 | +# A comprehensive dictionary of objects that are technically hashable but are |
| 163 | +# designed to stress the hash table implementation. This serves as a rich |
| 164 | +# pool of keys for fuzzing operations. |
| 165 | + |
| 166 | +tricky_hashable_keys = {} |
| 167 | + |
| 168 | +# Instantiate our malicious classes |
| 169 | +for i in range(5): |
| 170 | + tricky_hashable_keys[f"unstable_hash_{i}"] = UnstableHash() |
| 171 | + tricky_hashable_keys[f"always_equal_random_hash_{i}"] = AlwaysEqualButRandomHash() |
| 172 | +for i in range(20): # More of these to create collisions |
| 173 | + tricky_hashable_keys[f"always_unequal_constant_hash_{i}"] = ( |
| 174 | + AlwaysUnequalConstantHash() |
| 175 | + ) |
| 176 | + |
| 177 | +# Instances that raise various exceptions from __hash__ or __eq__ |
| 178 | +for exc in [ |
| 179 | + ValueError, |
| 180 | + TypeError, |
| 181 | + AttributeError, |
| 182 | + RecursionError, |
| 183 | + IndexError, |
| 184 | + KeyError, |
| 185 | +]: |
| 186 | + tricky_hashable_keys[f"hash_raises_{exc.__name__}"] = HashRaisesException(exc) |
| 187 | + tricky_hashable_keys[f"eq_raises_{exc.__name__}"] = EqRaisesException(exc) |
| 188 | + |
| 189 | +# Instances that return non-boolean values from __eq__ |
| 190 | +for ret_val in [None, 0, 1, "not a boolean", (1, 2), AlwaysUnequalConstantHash()]: |
| 191 | + tricky_hashable_keys[f"eq_returns_{type(ret_val).__name__}"] = EqReturnsWrongType( |
| 192 | + ret_val |
| 193 | + ) |
| 194 | + |
| 195 | +# Add fundamental edge-case hashables |
| 196 | +tricky_hashable_keys["none"] = None |
| 197 | +tricky_hashable_keys["true"] = True |
| 198 | +tricky_hashable_keys["false"] = False |
| 199 | +tricky_hashable_keys["float_nan"] = float("nan") |
| 200 | +tricky_hashable_keys["float_inf"] = float("inf") |
| 201 | +tricky_hashable_keys["empty_tuple"] = () |
| 202 | +tricky_hashable_keys["empty_frozenset"] = frozenset() |
| 203 | + |
| 204 | +# Gather all hashable objects from our other tricky modules |
| 205 | +if _HAS_DEPS: |
| 206 | + all_tricky_sources = { |
| 207 | + "tricky_obj": tricky_objects.__dict__, |
| 208 | + "weird_cls": weird_classes.weird_instances, |
| 209 | + # "tricky_np": tricky_numpy.__dict__, |
| 210 | + } |
| 211 | + for source_name, source_dict in all_tricky_sources.items(): |
| 212 | + for name, obj in source_dict.items(): |
| 213 | + if isinstance(name, str) and not name.startswith("_"): |
| 214 | + try: |
| 215 | + if isinstance(obj, collections.abc.Hashable): |
| 216 | + tricky_hashable_keys[f"{source_name}_{name}"] = obj |
| 217 | + except Exception: |
| 218 | + # Some objects might fail even on isinstance checks |
| 219 | + continue |
| 220 | + |
| 221 | + |
| 222 | +# --- 4. Pre-populated Malicious AtomicDict Instances --- |
| 223 | +# A collection of AtomicDicts that are already initialized with problematic |
| 224 | +# keys and structures. These serve as ready-made targets for fuzzing operations |
| 225 | +# like get, set, delete, reduce, etc. |
| 226 | + |
| 227 | +tricky_atomic_dicts = {} |
| 228 | + |
| 229 | +# "Collision Hell": A dict with many keys that have the same hash but are not equal. |
| 230 | +# This forces the C implementation to traverse a long probe sequence. |
| 231 | +collision_hell_dict = cereggii.AtomicDict() |
| 232 | +for i in range(50): |
| 233 | + collision_hell_dict[AlwaysUnequalConstantHash()] = i |
| 234 | +tricky_atomic_dicts["atomicdict_collision_hell"] = collision_hell_dict |
| 235 | + |
| 236 | +# "Unstable Hash": A dict populated with keys whose hash value changes. |
| 237 | +unstable_hash_dict = cereggii.AtomicDict() |
| 238 | +for i in range(10): |
| 239 | + unstable_hash_dict[UnstableHash()] = i |
| 240 | +tricky_atomic_dicts["atomicdict_unstable_hash"] = unstable_hash_dict |
| 241 | + |
| 242 | +# "Weird Keys": A dict populated with a diverse sample of weird but hashable keys. |
| 243 | +weird_keys_dict = cereggii.AtomicDict() |
| 244 | +# Take a sample of our tricky keys to populate the dict |
| 245 | +sample_keys = list(tricky_hashable_keys.values()) |
| 246 | +random.shuffle(sample_keys) |
| 247 | +for i, key in enumerate(sample_keys[:50]): # Populate with up to 50 weird keys |
| 248 | + try: |
| 249 | + weird_keys_dict[key] = i |
| 250 | + except Exception: # Some keys might fail insertion, which is fine |
| 251 | + continue |
| 252 | +tricky_atomic_dicts["atomicdict_weird_keys"] = weird_keys_dict |
| 253 | + |
| 254 | +# "Recursive Dict": A dict that contains a reference to itself. |
| 255 | +recursive_dict = cereggii.AtomicDict() |
| 256 | +recursive_dict["self_ref"] = recursive_dict |
| 257 | +tricky_atomic_dicts["atomicdict_self_recursive"] = recursive_dict |
| 258 | + |
| 259 | +# "Cross-Recursive Dicts": Two dicts that reference each other. |
| 260 | +cross_recursive_dict1 = cereggii.AtomicDict() |
| 261 | +cross_recursive_dict2 = cereggii.AtomicDict() |
| 262 | +cross_recursive_dict1["other"] = cross_recursive_dict2 |
| 263 | +cross_recursive_dict2["other"] = cross_recursive_dict1 |
| 264 | +tricky_atomic_dicts["atomicdict_cross_recursive_1"] = cross_recursive_dict1 |
| 265 | +tricky_atomic_dicts["atomicdict_cross_recursive_2"] = cross_recursive_dict2 |
| 266 | + |
| 267 | +# An AtomicDict initialized from a standard recursive Python dict |
| 268 | +try: |
| 269 | + std_recursive_dict = {} |
| 270 | + std_recursive_dict["self"] = std_recursive_dict |
| 271 | + tricky_atomic_dicts["atomicdict_from_std_recursive"] = cereggii.AtomicDict( |
| 272 | + std_recursive_dict |
| 273 | + ) |
| 274 | +except Exception: |
| 275 | + pass # This might raise RecursionError, which is fine |
| 276 | + |
| 277 | +# --- Sanity Check --- |
| 278 | +print(f"Aggregated {len(tricky_hashable_keys)} tricky hashable keys.") |
| 279 | +print( |
| 280 | + f"Created {len(tricky_atomic_dicts)} pre-populated malicious AtomicDict instances." |
| 281 | +) |
0 commit comments