From 5b917033c282d656443257d59dbbca464c4a409b Mon Sep 17 00:00:00 2001 From: "Gabe R." <9311953+marsfan@users.noreply.github.com> Date: Sat, 8 Nov 2025 16:47:28 -0700 Subject: [PATCH 1/4] Add type hints to all python files. Signed-off-by: Gabe R. <9311953+marsfan@users.noreply.github.com> --- xortool/args.py | 35 +++++++++++++++++++-- xortool/charset.py | 4 ++- xortool/libcolors.py | 15 ++++++--- xortool/routine.py | 26 ++++++++------- xortool/tool_main.py | 75 +++++++++++++++++++++++++++----------------- xortool/tool_xor.py | 14 +++++---- 6 files changed, 114 insertions(+), 55 deletions(-) diff --git a/xortool/args.py b/xortool/args.py index 035cd23..d8aced4 100644 --- a/xortool/args.py +++ b/xortool/args.py @@ -1,13 +1,37 @@ +from __future__ import annotations + +from typing import Any, Literal, cast, overload, TypedDict from docopt import docopt from xortool.charset import get_charset +class ParameterDict(TypedDict): + + brute_chars: bool + brute_printable: bool + filename: str + filter_output: bool + frequency_spread: int + input_is_hex: bool + known_key_length: int | None + max_key_length: int | None + most_frequent_char: int | None + text_charset: str | bytes + known_plain: bytes | Literal[False] + threshold: int | None + + class ArgError(Exception): pass +@overload +def parse_char(ch: None) -> None: ... + +@overload +def parse_char(ch: str) -> int: ... -def parse_char(ch): +def parse_char(ch: str | None) -> int | None: """ 'A' or '\x41' or '0x41' or '41' '\x00' or '0x00' or '00' @@ -24,14 +48,19 @@ def parse_char(ch): raise ValueError("Char can be only a char letter or hex") return int(ch, 16) +@overload +def parse_int(i: None) -> None: ... + +@overload +def parse_int(i: str) -> int: ... -def parse_int(i): +def parse_int(i: str | None) -> int | None: if i is None: return None return int(i) -def parse_parameters(doc, version): +def parse_parameters(doc: str, version: str) -> ParameterDict: p = docopt(doc, version=version) p = {k.lstrip("-"): v for k, v in p.items()} try: diff --git a/xortool/charset.py b/xortool/charset.py index 7939554..c3100bb 100644 --- a/xortool/charset.py +++ b/xortool/charset.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import string @@ -20,7 +22,7 @@ class CharsetError(Exception): } -def get_charset(charset): +def get_charset(charset: str | None) -> str | bytes: charset = charset or "printable" if charset in PREDEFINED_CHARSETS: return PREDEFINED_CHARSETS[charset].encode("ascii") diff --git a/xortool/libcolors.py b/xortool/libcolors.py index dc3ffe5..f4329b5 100644 --- a/xortool/libcolors.py +++ b/xortool/libcolors.py @@ -1,4 +1,7 @@ +from __future__ import annotations + import os +from typing import cast BASH_ATTRIBUTES = {"regular": "0", @@ -13,7 +16,7 @@ "blue": "44", "purple": "45", "cyan": "46", "white": "47"} -def _main(): +def _main() -> None: header = color("white", "black", "dark") print() @@ -36,7 +39,7 @@ def _main(): print() -def color(color=None, bgcolor=None, attrs=None): +def color(color: str | None = None, bgcolor: str | None = None, attrs: str | None = None) -> str: if not is_bash(): return "" @@ -61,13 +64,15 @@ def color(color=None, bgcolor=None, attrs=None): return ret + "m" -def is_bash(): +def is_bash() -> bool: return os.environ.get("SHELL", "unknown").endswith("bash") -def _keys_sorted_by_values(adict): +def _keys_sorted_by_values(adict: dict[str, str]) -> list[str]: """Return list of the keys of @adict sorted by values.""" - return sorted(adict, key=adict.get) + # Casting is fine here, we are sorting, adict.get will always + # get a value, and never return "None" + return sorted(adict, key=lambda v: cast(str, adict.get(v))) if __name__ == "__main__": diff --git a/xortool/routine.py b/xortool/routine.py index 4436ee1..c0040f9 100644 --- a/xortool/routine.py +++ b/xortool/routine.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import os import sys import string @@ -7,19 +9,19 @@ class MkdirError(Exception): pass -def load_file(filename): +def load_file(filename: str | int) -> bytes: if filename == "-": filename = sys.stdin.fileno() with open(filename, "rb") as fd: return fd.read() -def save_file(filename, data): +def save_file(filename: str, data: bytes) -> None: with open(filename, "wb") as fd: fd.write(data) -def mkdir(dirname): +def mkdir(dirname: str) -> None: if os.path.exists(dirname): return try: @@ -28,7 +30,7 @@ def mkdir(dirname): raise MkdirError(str(err)) -def rmdir(dirname): +def rmdir(dirname: str) -> None: if dirname[-1] == os.sep: dirname = dirname[:-1] if os.path.islink(dirname): @@ -43,30 +45,30 @@ def rmdir(dirname): os.unlink(path) os.rmdir(dirname) -def decode_from_hex(text): - text = text.decode(encoding='ascii', errors='ignore') - only_hex_digits = "".join(c for c in text if c in string.hexdigits) +def decode_from_hex(text: bytes) -> bytes: + text_bytes = text.decode(encoding='ascii', errors='ignore') + only_hex_digits = "".join(c for c in text_bytes if c in string.hexdigits) return bytes.fromhex(only_hex_digits) -def dexor(text, key): +def dexor(text: bytes, key: bytes) -> bytes: mod = len(key) return bytes(key[index % mod] ^ char for index, char in enumerate(text)) -def die(exitMessage, exitCode=1): +def die(exitMessage: str, exitCode: int = 1) -> None: print(exitMessage) sys.exit(exitCode) -def is_linux(): +def is_linux() -> bool: return sys.platform.startswith("linux") -def alphanum(s): +def alphanum(s: str) -> str: lst = list(s) for index, char in enumerate(lst): if char in string.ascii_letters + string.digits: continue - lst[index] = char.hex() + lst[index] = char.encode("ascii").hex() return "".join(lst) diff --git a/xortool/tool_main.py b/xortool/tool_main.py index e891224..0632a05 100755 --- a/xortool/tool_main.py +++ b/xortool/tool_main.py @@ -1,4 +1,8 @@ #!/usr/bin/env python3 +from __future__ import annotations + +from collections.abc import Iterable +from typing import cast from xortool import __version__ __doc__ = f""" xortool {__version__} @@ -50,6 +54,7 @@ import sys from xortool.args import( + ParameterDict, parse_parameters, ArgError, ) @@ -76,20 +81,21 @@ DIRNAME = 'xortool_out' # here plaintexts will be placed -PARAMETERS = dict() +PARAMETERS: ParameterDict = cast(ParameterDict, dict()) class AnalysisError(Exception): pass -def main(): +def main() -> None: try: PARAMETERS.update(parse_parameters(__doc__, __version__)) ciphertext = get_ciphertext() if not PARAMETERS["known_key_length"]: PARAMETERS["known_key_length"] = guess_key_length(ciphertext) + try_chars: Iterable[int] if PARAMETERS["brute_chars"]: try_chars = range(256) elif PARAMETERS["brute_printable"]: @@ -129,7 +135,7 @@ def main(): # LOADING CIPHERTEXT # ----------------------------------------------------------------------------- -def get_ciphertext(): +def get_ciphertext() -> bytes: """Load ciphertext from a file or stdin and hex-decode if needed""" ciphertext = load_file(PARAMETERS["filename"]) if PARAMETERS["input_is_hex"]: @@ -141,7 +147,7 @@ def get_ciphertext(): # KEYLENGTH GUESSING SECTION # ----------------------------------------------------------------------------- -def guess_key_length(text): +def guess_key_length(text: bytes) -> int: """ Try key lengths from 1 to max_key_length and print local maximums @@ -156,12 +162,20 @@ def guess_key_length(text): return get_max_fitnessed_key_length(fitnesses) -def calculate_fitnesses(text): +def calculate_fitnesses(text: bytes) -> list[tuple[int, float]]: """Calculate fitnesses for each keylen""" prev = 0 pprev = 0 - fitnesses = [] - for key_length in range(1, PARAMETERS["max_key_length"] + 1): + fitnesses: list[tuple[int, float]] = [] + + max_key_len = PARAMETERS["max_key_length"] + if max_key_len: + range_end = max_key_len + 1 + else: + range_end = 0 + + + for key_length in range(1, range_end): fitness = count_equals(text, key_length) # smaller key-length with nearly the same fitness is preferable @@ -180,7 +194,7 @@ def calculate_fitnesses(text): return fitnesses -def print_fitnesses(fitnesses): +def print_fitnesses(fitnesses: list[tuple[int, float]]) -> None: print("The most probable key lengths:") # top sorted by fitness, but print sorted by length @@ -205,11 +219,11 @@ def print_fitnesses(fitnesses): print(fmt.format(key_length, pct, **colors)) -def calculate_fitness_sum(fitnesses): +def calculate_fitness_sum(fitnesses: list[tuple[int, float]]) -> float: return sum([f[1] for f in fitnesses]) -def count_equals(text, key_length): +def count_equals(text: bytes, key_length: int) -> int: """Count equal chars count for each offset and sum them""" equals_count = 0 if key_length >= len(text): @@ -221,11 +235,14 @@ def count_equals(text, key_length): return equals_count -def guess_and_print_divisors(fitnesses): +def guess_and_print_divisors(fitnesses: list[tuple[int, float]]) -> int: """ Prints common divisors and returns the most common divisor """ - divisors_counts = [0] * (PARAMETERS["max_key_length"] + 1) + max_key_len = PARAMETERS["max_key_length"] + if not max_key_len: + max_key_len = 0 + divisors_counts = [0] * (max_key_len + 1) for key_length, fitness in fitnesses: for number in range(3, key_length + 1): if key_length % number == 0: @@ -245,8 +262,8 @@ def guess_and_print_divisors(fitnesses): return ret -def get_max_fitnessed_key_length(fitnesses): - max_fitness = 0 +def get_max_fitnessed_key_length(fitnesses: list[tuple[int, float]]) -> int: + max_fitness = 0.0 max_fitnessed_key_length = 0 for key_length, fitness in fitnesses: if fitness > max_fitness: @@ -255,8 +272,8 @@ def get_max_fitnessed_key_length(fitnesses): return max_fitnessed_key_length -def chars_count_at_offset(text, key_length, offset): - chars_count = dict() +def chars_count_at_offset(text: bytes, key_length: int, offset: int) -> dict[int, int]: + chars_count: dict[int, int] = dict() for pos in range(offset, len(text), key_length): c = text[pos] if c in chars_count: @@ -270,12 +287,12 @@ def chars_count_at_offset(text, key_length, offset): # KEYS GUESSING SECTION # ----------------------------------------------------------------------------- -def guess_probable_keys_for_chars(text, try_chars): +def guess_probable_keys_for_chars(text: bytes, try_chars: Iterable[int]) -> tuple[list[bytes], dict[bytes, int]]: """ Guess keys for list of characters. """ - probable_keys = [] - key_char_used = {} + probable_keys: list[bytes] = [] + key_char_used: dict[bytes, int] = {} for c in try_chars: keys = guess_keys(text, c) @@ -287,13 +304,15 @@ def guess_probable_keys_for_chars(text, try_chars): return probable_keys, key_char_used -def guess_keys(text, most_char): +def guess_keys(text: bytes, most_char: int) -> list[bytes]: """ Generate all possible keys for key length and the most possible char """ key_length = PARAMETERS["known_key_length"] - key_possible_bytes = [[] for _ in range(key_length)] + if not key_length: + key_length = 0 + key_possible_bytes: list[list[int]] = [[] for _ in range(key_length)] for offset in range(key_length): # each byte of key< chars_count = chars_count_at_offset(text, key_length, offset) @@ -305,11 +324,11 @@ def guess_keys(text, most_char): return all_keys(key_possible_bytes) -def all_keys(key_possible_bytes, key_part=(), offset=0): +def all_keys(key_possible_bytes: list[list[int]], key_part: tuple[int, ...] = (), offset: int = 0) -> list[bytes]: """ Produce all combinations of possible key chars """ - keys = [] + keys: list[bytes] = [] if offset >= len(key_possible_bytes): return [bytes(key_part)] for c in key_possible_bytes[offset]: @@ -317,7 +336,7 @@ def all_keys(key_possible_bytes, key_part=(), offset=0): return keys -def print_keys(keys): +def print_keys(keys: list[bytes]) -> None: if not keys: print("No keys guessed!") return @@ -334,7 +353,7 @@ def print_keys(keys): # RETURNS PERCENTAGE OF VALID TEXT CHARS # ----------------------------------------------------------------------------- -def percentage_valid(text): +def percentage_valid(text: bytes) -> float: x = 0.0 for c in text: if c in PARAMETERS["text_charset"]: @@ -346,7 +365,7 @@ def percentage_valid(text): # PRODUCE OUTPUT # ----------------------------------------------------------------------------- -def produce_plaintexts(ciphertext, keys, key_char_used): +def produce_plaintexts(ciphertext: bytes, keys: list[bytes], key_char_used: dict[bytes, int]) -> None: """ Produce plaintext variant for each possible key, creates csv files with keys, percentage of valid @@ -367,7 +386,7 @@ def produce_plaintexts(ciphertext, keys, key_char_used): key_mapping.write("file_name;key_repr\n") perc_mapping.write("file_name;char_used;perc_valid\n") - + if PARAMETERS["threshold"]: threshold_valid = PARAMETERS["threshold"] else: @@ -407,7 +426,7 @@ def produce_plaintexts(ciphertext, keys, key_char_used): print("See files {}, {}".format(fn_key_mapping, fn_perc_mapping)) -def cleanup(): +def cleanup() -> None: if os.path.exists(DIRNAME): rmdir(DIRNAME) diff --git a/xortool/tool_xor.py b/xortool/tool_xor.py index 2abe333..fac93da 100755 --- a/xortool/tool_xor.py +++ b/xortool/tool_xor.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +from __future__ import annotations + from xortool import __version__ __doc__ = f""" xortool-xor {__version__} @@ -20,14 +22,14 @@ import sys -def main(): +def main() -> None: cycle = True newline = True try: opts, _ = getopt.getopt( sys.argv[1:], "ns:r:h:f:", ["cycle", "no-cycle", "nc", "no-newline", "newline"]) - datas = [] + datas: list[bytes] = [] for c, val in opts: if c == "--cycle": cycle = True @@ -51,7 +53,7 @@ def main(): sys.stdout.buffer.write(b"\n") -def xor(args, cycle=True): +def xor(args: list[bytes], cycle: bool = True) -> bytearray: # Sort by len DESC args.sort(key=len, reverse=True) res = bytearray(args.pop(0)) @@ -64,20 +66,20 @@ def xor(args, cycle=True): return res -def from_str(s): +def from_str(s: str) -> bytes: res = b'' for char in s.encode("utf-8").decode("unicode_escape"): res += bytes([ord(char)]) return res -def from_file(s): +def from_file(s: str | int) -> bytes: if s == "-": s = sys.stdin.fileno() return open(s, "rb").read() -def arg_data(opt, s): +def arg_data(opt: str, s: str) -> bytes: if opt == "-s": return from_str(s) if opt == "-r": From 22acfdb76a59672e88d7586e817a0877c2d9eea2 Mon Sep 17 00:00:00 2001 From: "Gabe R." <9311953+marsfan@users.noreply.github.com> Date: Sat, 8 Nov 2025 17:59:22 -0700 Subject: [PATCH 2/4] Fix wrong types in calculate_fitness Signed-off-by: Gabe R. <9311953+marsfan@users.noreply.github.com> --- xortool/tool_main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xortool/tool_main.py b/xortool/tool_main.py index 0632a05..7f56b0c 100755 --- a/xortool/tool_main.py +++ b/xortool/tool_main.py @@ -164,8 +164,8 @@ def guess_key_length(text: bytes) -> int: def calculate_fitnesses(text: bytes) -> list[tuple[int, float]]: """Calculate fitnesses for each keylen""" - prev = 0 - pprev = 0 + prev = 0.0 + pprev = 0.0 fitnesses: list[tuple[int, float]] = [] max_key_len = PARAMETERS["max_key_length"] @@ -176,10 +176,10 @@ def calculate_fitnesses(text: bytes) -> list[tuple[int, float]]: for key_length in range(1, range_end): - fitness = count_equals(text, key_length) + fitness = float(count_equals(text, key_length)) # smaller key-length with nearly the same fitness is preferable - fitness = (float(fitness) / + fitness = float(float(fitness) / (PARAMETERS["max_key_length"] + key_length ** 1.5)) if pprev < prev and prev > fitness: # local maximum From afe1e5369b04c7955fb549c4341ecc2d01e5c702 Mon Sep 17 00:00:00 2001 From: "Gabe R." <9311953+marsfan@users.noreply.github.com> Date: Sat, 8 Nov 2025 18:40:51 -0700 Subject: [PATCH 3/4] Remove unused imports Signed-off-by: Gabe R. <9311953+marsfan@users.noreply.github.com> --- xortool/args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xortool/args.py b/xortool/args.py index d8aced4..d600d1b 100644 --- a/xortool/args.py +++ b/xortool/args.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Literal, cast, overload, TypedDict +from typing import Literal, overload, TypedDict from docopt import docopt from xortool.charset import get_charset From f8fcae81508e39926225027a1e99d0111e17266f Mon Sep 17 00:00:00 2001 From: "Gabe R." <9311953+marsfan@users.noreply.github.com> Date: Sat, 8 Nov 2025 18:41:15 -0700 Subject: [PATCH 4/4] Set min python version to 3.8 and add docopt type stubs as dev dependency Signed-off-by: Gabe R. <9311953+marsfan@users.noreply.github.com> --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 169d4d4..71e1c95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ classifiers = [ dependencies = [ "docopt>=0.6.2", ] +requires-python = ">=3.8" [project.scripts] xortool = 'xortool.tool_main:main' @@ -29,6 +30,7 @@ homepage = "http://github.com/hellman/xortool" [tool.poetry.group.dev.dependencies] importlib_metadata = "^4.8" +types-docopt = "^0.6" [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"]