diff --git a/corp/sampletext.txt b/corp/sampletext.txt new file mode 100644 index 0000000..bb56fc8 --- /dev/null +++ b/corp/sampletext.txt @@ -0,0 +1 @@ +sampletext \ No newline at end of file diff --git a/dictionary.txt b/dictionary.txt new file mode 100644 index 0000000..a0d1588 --- /dev/null +++ b/dictionary.txt @@ -0,0 +1,11 @@ +# Just a basic fuzzing dictionary. The http protocols are hardcoded because otherwise it would be hard for the fuzzer to get past certain hardcoded string checks +"https://" +"org" +"www" +"" +"<" +">" +"https" +"http" +"@" +":" diff --git a/fuzz.sh b/fuzz.sh new file mode 100755 index 0000000..7553843 --- /dev/null +++ b/fuzz.sh @@ -0,0 +1,4 @@ + +# Just use 10k as max length +python3 basic_fuzzer.py -dict=dictionary.txt -max_len=10000 -timeout=1 corp/ + diff --git a/fuzz_alg_comp.sh b/fuzz_alg_comp.sh new file mode 100755 index 0000000..0fc5f9d --- /dev/null +++ b/fuzz_alg_comp.sh @@ -0,0 +1,4 @@ + +# Just use 100k as max length +python3 fuzz_algorithmic_complexity.py -dict=dictionary.txt -max_len=100000 -timeout=1 corp/ + diff --git a/fuzz_algorithmic_complexity.py b/fuzz_algorithmic_complexity.py new file mode 100644 index 0000000..9312253 --- /dev/null +++ b/fuzz_algorithmic_complexity.py @@ -0,0 +1,43 @@ +import sys +import atheris +import mutator # Custom mutator + +with atheris.instrument_imports(): + import fuzzers + from django.core.exceptions import SuspiciousOperation + + +def TestOneInput(data): + if len(data) == 0: # We use the first byte to choose the test, so therefore we can not process empty inputs. + return + #assert len(fuzzers.tests) <= 256 # must fit in a byte.... + choice = int(data[0]) + choice = choice % len(fuzzers.tests_str) + data = data[1:] # Do the thing... + func, data_type = fuzzers.tests_str[choice] + # assert data_type == str # Should be string... + # Here in the original version we used the fuzz data provider to generate inputs, however in this fork we just use only the functions which take strings. + try: + data = data.decode("utf-8") # Try to decode as hex. All the functions should only take string input, therefore + func(data) + except (UnicodeDecodeError, SuspiciousOperation): + # Just ignore decode errors + return + except Exception: + print(func, data_type, repr(data)) + raise + return + +def CustomMutator(data, max_size, seed): + try: + res = mutator.mutate(data) # Call custom mutator. + except: + res = atheris.Mutate(data, len(data)) + else: + res = atheris.Mutate(res, len(res)) + if len(res) >= max_size: # Truncate inputs which are too long... + return res[:max_size] + return res + +atheris.Setup(sys.argv, TestOneInput, custom_mutator=CustomMutator, internal_libfuzzer=True) # Use the custom mutator +atheris.Fuzz() diff --git a/fuzzers.py b/fuzzers.py index 80368a2..921b7e0 100644 --- a/fuzzers.py +++ b/fuzzers.py @@ -411,3 +411,7 @@ def test_forms_UUIDField(inp): (test_forms_URLField, str), (test_forms_UUIDField, str), ] + +# These are the tests which only take string inputs + +tests_str = [x for x in tests if x[1] == str] diff --git a/mutator.py b/mutator.py new file mode 100644 index 0000000..dc41046 --- /dev/null +++ b/mutator.py @@ -0,0 +1,128 @@ +import random +import string +import math +import sys + +# Custom mutator parameters. Tweak these for your use case. + +MAXIMUM_LENGTH = 100_000 # Absolute maximum length of the generated data. +NEW_DATA_CHANCE = 0.01 # Possibility of creating an entirely new string +MAX_REPEAT_COUNT = 100_00 # Maximum amount of repetitions +MAX_REPEAT_LENGTH = 10000 # Maximum length of the repeating stuff +MAX_REPEAT_COUNT_LINEAR = 10000 +MIN_REPEAT_COUNT_LINEAR = 200 +MAX_REPEAT_TOKEN_LENGTH = 5 # Maximum length of the string which to repeat. +MAX_REPEAT_STRING_COUNT = 10 # Maximum amount of repeating substrings +MAX_SUBSTRING_LENGTH = 10 + +def f(x: float) -> float: # Function (this is assumed to be growing in the period 0 <= x <= 1) + return (x + 0.6) ** 3 + max(MIN_REPEAT_COUNT_LINEAR, round(MAX_REPEAT_COUNT_LINEAR * x)) + +PRECALC = f(1.0) # Precalculated value for faster execution + +def dist_function(x: float) -> float: # Distribution. x is assumed to be between 0 <= x <= 1 + assert 0 <= x <= 1 + return f(x) / PRECALC # Random value divided by maximum value. (Maximum is assumed to be at x = 1) + +def distribution(val: int) -> int: # Returns a random number (max is c and minimum is zero). This function is biased against small numbers (the probability of generating a relatively small number is high, whileas the probability of generating a comparatively large number is small.) + return round(val * dist_function(chance())) + +def custom_mutator(data, max_size, seed, native_mutator): + # Just call mutate and see what happens... + if isinstance(data, bytearray): + convert = True + data = bytes(data) + new_data = mutate(data) + if convert: + new_data = bytearray(new_data) + if len(new_data) >= max_size: + return new_data[:max_size] # Just add a cutoff + return new_data + +def chance() -> float: # Shorthand + return random.random() + +def c(const: float) -> bool: # Rolls a dice and returns true with a probability of "const" + return chance() <= const + +def rnum(n: int) -> int: # Shorthand + if n in (0, -1): + return 0 + return random.randrange(0, n) + +def stringmult(string_bytes: bytes, val: int) -> bytes: # Multiplies string by val times. + if len(string_bytes) == 0: # Avoid division by zero + return string_bytes + count = min(math.floor(MAX_REPEAT_LENGTH/len(string_bytes)), val) + assert isinstance(count, int) + out = string_bytes * count + assert len(out) <= MAX_REPEAT_LENGTH + return out + +def rand_ascii_string(n: int) -> bytes: # Generates n random ascii bytes (taken from string.printable) + return bytes([ord(random.choice(string.printable)) for _ in range(n)]) # Create array of allowed bytes and convert to bytes + +def generate_repeating(n: int) -> bytes: # Generate a random repeating string and repeat it n times + #return stringmult(rand_ascii_string(rnum(MAX_REPEAT_TOKEN_LENGTH)), n) + return stringmult(rand_ascii_string(distribution(MAX_REPEAT_TOKEN_LENGTH)), n) + +def generate_new() -> bytes: # Generate a new ascii string. + repeat_count = rnum(MAX_REPEAT_STRING_COUNT) + out = b"" # Final generated string + for _ in range(repeat_count): # Generate "repeat_count" repeating strings. + out += generate_repeating(rnum(MAX_REPEAT_COUNT)) + return out + +def get_substr(data: bytes) -> bytes: + rand_ind = min(rnum(len(data)), MAX_SUBSTRING_LENGTH) + length = rnum(len(data[rand_ind:])-1) + result = data[rand_ind:rand_ind+length] + return result, rand_ind # Return the substring + +def mutate_existing(data: bytes) -> bytes: + substr, rand_ind = get_substr(data) + if substr: + # Just cut out the original string and then add the multiplied substring in there. + rep_count = distribution(MAX_REPEAT_COUNT) + multiplication = stringmult(substr, rep_count) + data = data[:rand_ind] + multiplication + data[rand_ind+len(substr):] + return data + # Place somewhere else. + place_index = rnum(len(data)-1) + rep_count = distribution(MAX_REPEAT_COUNT) + multiplication = stringmult(rand_ascii_string(distribution(MAX_REPEAT_TOKEN_LENGTH)), rep_count) + data = data[place_index:] + multiplication + data[place_index:] + return data + +def mutate(data: bytes): # Main mutator entry point. Returns a mutated version of the data. + if c(NEW_DATA_CHANCE): # Create new string. + return generate_new() + # Mutate existing. + return mutate_existing(data) + +def fuzz(buf, add_buf, max_size): # For AFL and AFL++ + data = buf + data = bytes(data) # Convert bytearray to bytes. + data = mutate(data) + if len(data) >= max_size: + print("Truncating returned fuzz data...\n") + print("Orig len is " + str(len(data)) + " . New len is " + str(max_size)) + data = data[:max_size] # Truncate + data = bytearray(data) # Convert bytes back to bytearray. + return data + +def deinit(): # AFL and AFL++ complain if we do not have this for some reason... + pass + +if __name__=="__main__": # For testing only. This just checks that the mutator doesn't crash + MAX_MUT_COUNT = 2000 + TEST_COUNT = 100000 + BRACE_COUNT = 100 + while True: + MUT_COUNT = rnum(MAX_MUT_COUNT) + RESULT = b"paskaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + for _ in range(MUT_COUNT): + RESULT = mutate_existing(RESULT) + if len(RESULT) > MAXIMUM_LENGTH: # Bounds check + RESULT = RESULT[:MAXIMUM_LENGTH] + sys.exit(0) # exit succesfully