diff --git a/.gitignore b/.gitignore index 7eb10d2..90895ba 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,7 @@ dist build MANIFEST TODO + +__pycache__ +.ruff* +xortool_out \ No newline at end of file diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..3e388a4 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.13.2 diff --git a/README.md b/README.md index ba079c2..a5a528f 100644 --- a/README.md +++ b/README.md @@ -1,75 +1,79 @@ -xortool.py -==================== +# xortool.py A tool to do some xor analysis: - - guess the key length (based on count of equal chars) - - guess the key (base on knowledge of most frequent char) +- guess the key length (based on count of equal chars) +- guess the key (base on knowledge of most frequent char) -**Notice:** xortool is now only running on Python 3. The old Python 2 version is accessible at the `py2` branch. The **pip** package has been updated. +**Notice:** xortool is now only running on Python 3. (And Update with `rye` project support and maximum support for Python 3.9-3.13+) ## Installation ```bash -$ pip3 install xortool +pip3 install xortool ``` For development or building this repository, [poetry](https://python-poetry.org/) is needed. - ```bash poetry build pip install dist/xortool*.whl ``` -Usage ---------------------- +## Usage + +```text +$ xortool --help + + Usage: xortool [OPTIONS] [FILENAME] + + A tool to do some xor analysis: + - guess the key length (based on count of equal chars) + - guess the key (base on knowledge of most frequent char) + +╭─ Arguments ───────────────────────────────────────────────────────────────────────────────╮ +│ filename [FILENAME] Input file (or stdin if omitted) │ +╰───────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ─────────────────────────────────────────────────────────────────────────────────╮ +│ --hex -x Input is hex-encoded str │ +│ --key-length -l INTEGER Length of the key [default: None] │ +│ --max-keylen -m INTEGER Maximum key length to probe [default: 65] │ +│ --char -c TEXT Most frequent char (one char or hex code) │ +│ [default: None] │ +│ --brute-chars -b Brute force all possible most frequent chars │ +│ --brute-printable -o Same as -b but will only check printable chars │ +│ --filter-output -f Filter outputs based on the charset │ +│ --text-charset -t TEXT Target text character set │ +│ [default: │ +│ 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP… │ +│ ] │ +│ --known-plaintext -p TEXT Use known plaintext for decoding [default: None] │ +│ --version Show version and exit │ +│ --help Show this message and exit. │ +╰───────────────────────────────────────────────────────────────────────────────────────────╯ ``` -xortool - A tool to do some xor analysis: - - guess the key length (based on count of equal chars) - - guess the key (base on knowledge of most frequent char) - -Usage: - xortool [-x] [-m MAX-LEN] [-f] [-t CHARSET] [FILE] - xortool [-x] [-l LEN] [-c CHAR | -b | -o] [-f] [-t CHARSET] [-p PLAIN] [FILE] - xortool [-x] [-m MAX-LEN| -l LEN] [-c CHAR | -b | -o] [-f] [-t CHARSET] [-p PLAIN] [FILE] - xortool [-h | --help] - xortool --version - -Options: - -x --hex input is hex-encoded str - -l LEN, --key-length=LEN length of the key - -m MAX-LEN, --max-keylen=MAX-LEN maximum key length to probe [default: 65] - -c CHAR, --char=CHAR most frequent char (one char or hex code) - -b --brute-chars brute force all possible most frequent chars - -o --brute-printable same as -b but will only check printable chars - -f --filter-output filter outputs based on the charset - -t CHARSET --text-charset=CHARSET target text character set [default: printable] - -p PLAIN --known-plaintext=PLAIN use known plaintext for decoding - -h --help show this help Notes: - Text character set: - * Pre-defined sets: printable, base32, base64 - * Custom sets: - - a: lowercase chars - - A: uppercase chars - - 1: digits - - !: special chars - - *: printable chars +Text character set: + +- Pre-defined sets: printable, base32, base64 +- Custom sets: + - a: lowercase chars + - A: uppercase chars + - 1: digits + - !: special chars + - \*: printable chars Examples: - xortool file.bin - xortool -l 11 -c 20 file.bin - xortool -x -c ' ' file.hex - xortool -b -f -l 23 -t base64 message.enc - xortool -b -p "xctf{" message.enc -``` -Example 1 ---------------------- +- `xortool file.bin` +- `xortool -l 11 -c 20 file.bin` +- `xortool -x -c ' ' file.hex` +- `xortool -b -f -l 23 -t base64 message.enc` +- `xortool -b -p "xctf{" message.enc` + +## Example 1 ```bash # xor is xortool/xortool-xor @@ -149,8 +153,7 @@ So, if automated decryption fails, you can calibrate: - (`-l`) selected length to see some interesting keys - (`-c`) the most frequent char to produce right plaintext -Example 2 ---------------------- +## Example 2 We are given a message in encoded in Base64 and XORed with an unknown key. @@ -188,8 +191,7 @@ See files filename-key.csv, filename-char_used-perc_valid.csv By filtering the outputs on the character set of Base64, we directly keep the only solution. -Information ---------------------- +## Information Author: hellman diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index 9777eeb..0000000 --- a/poetry.lock +++ /dev/null @@ -1,66 +0,0 @@ -# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. - -[[package]] -name = "docopt" -version = "0.6.2" -description = "Pythonic argument parser, that will make you smile" -category = "main" -optional = false -python-versions = "*" -files = [ - {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"}, -] - -[[package]] -name = "importlib-metadata" -version = "4.8.3" -description = "Read metadata from Python packages" -category = "dev" -optional = false -python-versions = ">=3.6" -files = [ - {file = "importlib_metadata-4.8.3-py3-none-any.whl", hash = "sha256:65a9576a5b2d58ca44d133c42a241905cc45e34d2c06fd5ba2bafa221e5d7b5e"}, - {file = "importlib_metadata-4.8.3.tar.gz", hash = "sha256:766abffff765960fcc18003801f7044eb6755ffae4521c8e8ce8e83b9c9b0668"}, -] - -[package.dependencies] -typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} -zipp = ">=0.5" - -[package.extras] -docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"] -perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pep517", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy", "pytest-perf (>=0.9.2)"] - -[[package]] -name = "typing-extensions" -version = "4.1.1" -description = "Backported and Experimental Type Hints for Python 3.6+" -category = "dev" -optional = false -python-versions = ">=3.6" -files = [ - {file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"}, - {file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"}, -] - -[[package]] -name = "zipp" -version = "3.6.0" -description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" -optional = false -python-versions = ">=3.6" -files = [ - {file = "zipp-3.6.0-py3-none-any.whl", hash = "sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"}, - {file = "zipp-3.6.0.tar.gz", hash = "sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832"}, -] - -[package.extras] -docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"] -testing = ["func-timeout", "jaraco.itertools", "pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"] - -[metadata] -lock-version = "2.0" -python-versions = ">=3.6,<4.0" -content-hash = "88cc4de2bfddffb9eaa18471a86bc13f6a3896834e52c1372d912925dad0cecd" diff --git a/pyproject.toml b/pyproject.toml index 8a2db16..82ec1ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ -[tool.poetry] +[project] name = "xortool" -version = "1.0.2" +version = "1.1.0" description = "A tool to analyze multi-byte xor cipher" -authors = ["hellman"] +authors = [{ name = "hellman", email = "hellman@mathstodon.xyz" }] license = "MIT" readme = "README.md" keywords = ["xor", "xortool", "cryptanalysis"] @@ -11,22 +11,25 @@ classifiers = [ 'Intended Audience :: Science/Research', 'Topic :: Security :: Cryptography', ] +requires-python = ">=3.6,<4.0" +dependencies = ["typer>=0.15.2", "rich>=14.0.0"] -[tool.poetry.scripts] +[project.scripts] xortool = 'xortool.tool_main:main' xortool-xor = 'xortool.tool_xor:main' -[tool.poetry.urls] +[project.urls] homepage = "http://github.com/hellman/xortool" -[tool.poetry.dependencies] -python = ">=3.6,<4.0" -docopt = "^0.6.2" +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" -[tool.poetry.dev-dependencies] -docopt = "^0.6.2" -importlib_metadata = "^4.8" +[tool.rye] +managed = true +dev-dependencies = [] +# dev-dependencies = ["importlib_metadata ^4.8"] -[build-system] -requires = ["poetry-core>=1.0.0a5"] -build-backend = "poetry.core.masonry.api" +# [build-system] +# requires = ["rye>=0.21.1"] +# build-backend = "rye.masonry.api" diff --git a/requirements-dev.lock b/requirements-dev.lock new file mode 100644 index 0000000..867ba09 --- /dev/null +++ b/requirements-dev.lock @@ -0,0 +1,29 @@ +# generated by rye +# use `rye lock` or `rye sync` to update this lockfile +# +# last locked with the following flags: +# pre: false +# features: [] +# all-features: false +# with-sources: false +# generate-hashes: false +# universal: false + +-e file:. +click==8.1.8 + # via typer +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +pygments==2.19.1 + # via rich +rich==14.0.0 + # via typer + # via xortool +shellingham==1.5.4 + # via typer +typer==0.15.2 + # via xortool +typing-extensions==4.13.2 + # via typer diff --git a/requirements.lock b/requirements.lock new file mode 100644 index 0000000..867ba09 --- /dev/null +++ b/requirements.lock @@ -0,0 +1,29 @@ +# generated by rye +# use `rye lock` or `rye sync` to update this lockfile +# +# last locked with the following flags: +# pre: false +# features: [] +# all-features: false +# with-sources: false +# generate-hashes: false +# universal: false + +-e file:. +click==8.1.8 + # via typer +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +pygments==2.19.1 + # via rich +rich==14.0.0 + # via typer + # via xortool +shellingham==1.5.4 + # via typer +typer==0.15.2 + # via xortool +typing-extensions==4.13.2 + # via typer diff --git a/xortool/__init__.py b/xortool/__init__.py index e3f40c3..f35ac65 100644 --- a/xortool/__init__.py +++ b/xortool/__init__.py @@ -1,3 +1,3 @@ -from importlib_metadata import version +from importlib.metadata import version -__version__ = version(__package__) +__version__ = version(__package__ or "xortool") diff --git a/xortool/args.py b/xortool/args.py index 78587d5..cc26818 100644 --- a/xortool/args.py +++ b/xortool/args.py @@ -1,8 +1,3 @@ -from docopt import docopt - -from xortool.charset import get_charset - - class ArgError(Exception): pass @@ -29,24 +24,3 @@ def parse_int(i): if i is None: return None return int(i) - - -def parse_parameters(doc, version): - p = docopt(doc, version=version) - p = {k.lstrip("-"): v for k, v in p.items()} - try: - return { - "brute_chars": bool(p["brute-chars"]), - "brute_printable": bool(p["brute-printable"]), - "filename": p["FILE"] if p["FILE"] else "-", # stdin by default - "filter_output": bool(p["filter-output"]), - "frequency_spread": 0, # to be removed - "input_is_hex": bool(p["hex"]), - "known_key_length": parse_int(p["key-length"]), - "max_key_length": parse_int(p["max-keylen"]), - "most_frequent_char": parse_char(p["char"]), - "text_charset": get_charset(p["text-charset"]), - "known_plain": p["known-plaintext"].encode() if p["known-plaintext"] else False, - } - except ValueError as err: - raise ArgError(str(err)) diff --git a/xortool/charset.py b/xortool/charset.py index 7939554..6d395c6 100644 --- a/xortool/charset.py +++ b/xortool/charset.py @@ -1,4 +1,5 @@ import string +from typing import Optional class CharsetError(Exception): @@ -14,13 +15,13 @@ class CharsetError(Exception): } PREDEFINED_CHARSETS = { - "base32": CHARSETS["A"] + "234567=", - "base64": CHARSETS["a"] + CHARSETS["A"] + CHARSETS["1"] + "/+=", + "base32": CHARSETS["A"] + "234567=", + "base64": CHARSETS["a"] + CHARSETS["A"] + CHARSETS["1"] + "/+=", "printable": CHARSETS["*"], } -def get_charset(charset): +def get_charset(charset: Optional[str]): charset = charset or "printable" if charset in PREDEFINED_CHARSETS: return PREDEFINED_CHARSETS[charset].encode("ascii") diff --git a/xortool/colors.py b/xortool/colors.py deleted file mode 100644 index cb0d313..0000000 --- a/xortool/colors.py +++ /dev/null @@ -1,30 +0,0 @@ -from xortool.libcolors import color - -C_RESET = color() -C_FATAL = color("red") -C_WARN = color("yellow") - -C_KEYLEN = color("green") -C_PROB = color("white", attrs="") -C_BEST_KEYLEN = color("green", attrs="bold") -C_BEST_PROB = color("white", attrs="bold") - -C_DIV = color(attrs="bold") - -C_KEY = color("red", attrs="bold") -C_BOLD = color(attrs="bold") -C_COUNT = color("yellow", attrs="bold") - -COLORS = { - 'C_RESET': C_RESET, - 'C_FATAL': C_FATAL, - 'C_WARN': C_WARN, - 'C_KEYLEN': C_KEYLEN, - 'C_PROB': C_PROB, - 'C_BEST_KEYLEN': C_BEST_KEYLEN, - 'C_BEST_PROB': C_BEST_PROB, - 'C_DIV': C_DIV, - 'C_KEY': C_KEY, - 'C_BOLD': C_BOLD, - 'C_COUNT': C_COUNT, -} diff --git a/xortool/libcolors.py b/xortool/libcolors.py deleted file mode 100644 index dc3ffe5..0000000 --- a/xortool/libcolors.py +++ /dev/null @@ -1,74 +0,0 @@ -import os - - -BASH_ATTRIBUTES = {"regular": "0", - "bold": "1", "underline": "4", "strike": "9", - "light": "1", "dark": "2", - "invert": "7"} # invert bg and fg - -BASH_COLORS = {"black": "30", "red": "31", "green": "32", "yellow": "33", - "blue": "34", "purple": "35", "cyan": "36", "white": "37"} - -BASH_BGCOLORS = {"black": "40", "red": "41", "green": "42", "yellow": "43", - "blue": "44", "purple": "45", "cyan": "46", "white": "47"} - - -def _main(): - header = color("white", "black", "dark") - print() - - print(header + " " + "Colors and backgrounds: " + color()) - for c in _keys_sorted_by_values(BASH_COLORS): - c1 = color(c) - c2 = color("white" if c != "white" else "black", bgcolor=c) - print((c.ljust(10) + - c1 + "colored text" + color() + " " + - c2 + "background" + color())) - print() - - print(header + " " + "Attributes: " + color()) - for c in _keys_sorted_by_values(BASH_ATTRIBUTES): - c1 = color("red", attrs=c) - c2 = color("white", attrs=c) - print((c.ljust(13) + - c1 + "red text" + color() + " " + - c2 + "white text" + color())) - print() - - -def color(color=None, bgcolor=None, attrs=None): - if not is_bash(): - return "" - - ret = "\x1b[0" - if attrs: - for attr in attrs.lower().split(): - attr = attr.strip(",+|") - if attr not in BASH_ATTRIBUTES: - raise ValueError("Unknown color attribute: " + attr) - ret += ";" + BASH_ATTRIBUTES[attr] - - if color: - if color not in BASH_COLORS: - raise ValueError("Unknown color: " + color) - ret += ";" + BASH_COLORS[color] - - if bgcolor: - if bgcolor not in BASH_BGCOLORS: - raise ValueError("Unknown background color: " + bgcolor) - ret += ";" + BASH_BGCOLORS[bgcolor] - - return ret + "m" - - -def is_bash(): - return os.environ.get("SHELL", "unknown").endswith("bash") - - -def _keys_sorted_by_values(adict): - """Return list of the keys of @adict sorted by values.""" - return sorted(adict, key=adict.get) - - -if __name__ == "__main__": - _main() diff --git a/xortool/routine.py b/xortool/routine.py index 4436ee1..16e81cf 100644 --- a/xortool/routine.py +++ b/xortool/routine.py @@ -1,6 +1,6 @@ import os -import sys import string +import sys class MkdirError(Exception): @@ -34,7 +34,7 @@ def rmdir(dirname): if os.path.islink(dirname): return # do not clear link - we can get out of dir for f in os.listdir(dirname): - if f in ('.', '..'): + if f in (".", ".."): continue path = dirname + os.sep + f if os.path.isdir(path): @@ -43,8 +43,9 @@ def rmdir(dirname): os.unlink(path) os.rmdir(dirname) + def decode_from_hex(text): - text = text.decode(encoding='ascii', errors='ignore') + text = text.decode(encoding="ascii", errors="ignore") only_hex_digits = "".join(c for c in text if c in string.hexdigits) return bytes.fromhex(only_hex_digits) diff --git a/xortool/tool_main.py b/xortool/tool_main.py index 5620d3d..89f391e 100755 --- a/xortool/tool_main.py +++ b/xortool/tool_main.py @@ -1,171 +1,70 @@ #!/usr/bin/env python3 -from xortool import __version__ -__doc__ = f""" -xortool {__version__} - A tool to do some xor analysis: - - guess the key length (based on count of equal chars) - - guess the key (base on knowledge of most frequent char) - -Usage: - xortool [-x] [-m MAX-LEN] [-f] [-t CHARSET] [FILE] - xortool [-x] [-l LEN] [-c CHAR | -b | -o] [-f] [-t CHARSET] [-p PLAIN] [FILE] - xortool [-x] [-m MAX-LEN| -l LEN] [-c CHAR | -b | -o] [-f] [-t CHARSET] [-p PLAIN] [FILE] - xortool [-h | --help] - xortool --version - -Options: - -x --hex input is hex-encoded str - -l LEN, --key-length=LEN length of the key - -m MAX-LEN, --max-keylen=MAX-LEN maximum key length to probe [default: 65] - -c CHAR, --char=CHAR most frequent char (one char or hex code) - -b --brute-chars brute force all possible most frequent chars - -o --brute-printable same as -b but will only check printable chars - -f --filter-output filter outputs based on the charset - -t CHARSET --text-charset=CHARSET target text character set [default: printable] - -p PLAIN --known-plaintext=PLAIN use known plaintext for decoding - -h --help show this help - -Notes: - Text character set: - * Pre-defined sets: printable, base32, base64 - * Custom sets: - - a: lowercase chars - - A: uppercase chars - - 1: digits - - !: special chars - - *: printable chars - -Examples: - xortool file.bin - xortool -l 11 -c 20 file.bin - xortool -x -c ' ' file.hex - xortool -b -f -l 23 -t base64 message.enc -""" - -from operator import itemgetter import os import string import sys +from operator import itemgetter +from typing import Annotated, Optional -from xortool.args import ( - parse_parameters, - ArgError, -) -from xortool.charset import CharsetError -from xortool.colors import ( - COLORS, - C_BEST_KEYLEN, - C_BEST_PROB, - C_FATAL, - C_KEY, - C_RESET, - C_WARN, -) +import typer +from rich.console import Console +from rich.style import Style + +from xortool import __version__ +from xortool.args import ArgError +from xortool.charset import CharsetError, get_charset from xortool.routine import ( + MkdirError, decode_from_hex, dexor, die, load_file, mkdir, rmdir, - MkdirError, ) - -DIRNAME = 'xortool_out' # here plaintexts will be placed +app = typer.Typer(add_completion=False) +DIRNAME = "xortool_out" PARAMETERS = dict() +console = Console() +STYLE_BEST_KEYLEN = Style(color="cyan", bold=True) +STYLE_BEST_PROB = Style(color="magenta", bold=True) +STYLE_FATAL = Style(color="red", bold=True) +STYLE_KEY = Style(color="yellow", bold=True) +STYLE_WARN = Style(color="yellow") +STYLE_COUNT = Style(color="green", bold=True) +STYLE_KEYLEN = Style(color="blue", bold=True) +STYLE_PROB = Style(color="magenta") +STYLE_RESET = Style() # default + class AnalysisError(Exception): pass -def main(): - try: - PARAMETERS.update(parse_parameters(__doc__, __version__)) - ciphertext = get_ciphertext() - if not PARAMETERS["known_key_length"]: - PARAMETERS["known_key_length"] = guess_key_length(ciphertext) - - if PARAMETERS["brute_chars"]: - try_chars = range(256) - elif PARAMETERS["brute_printable"]: - try_chars = map(ord, string.printable) - elif PARAMETERS["most_frequent_char"] is not None: - try_chars = [PARAMETERS["most_frequent_char"]] - else: - die(C_WARN + - "Most possible char is needed to guess the key!" + - C_RESET) - - (probable_keys, - key_char_used) = guess_probable_keys_for_chars(ciphertext, try_chars) - - print_keys(probable_keys) - produce_plaintexts(ciphertext, probable_keys, key_char_used) - - except AnalysisError as err: - print(C_FATAL + "[ERROR] Analysis error:\n\t", err, C_RESET) - except ArgError as err: - print(C_FATAL + "[ERROR] Bad argument:\n\t", err, C_RESET) - except CharsetError as err: - print(C_FATAL + "[ERROR] Bad charset:\n\t", err, C_RESET) - except IOError as err: - print(C_FATAL + "[ERROR] Can't load file:\n\t", err, C_RESET) - except MkdirError as err: - print(C_FATAL + "[ERROR] Can't create directory:\n\t", err, C_RESET) - except UnicodeDecodeError as err: - print(C_FATAL + "[ERROR] Input is not hex:\n\t", err, C_RESET) - else: - return - cleanup() - sys.exit(1) - +def cleanup(): + if os.path.exists(DIRNAME): + rmdir(DIRNAME) -# ----------------------------------------------------------------------------- -# LOADING CIPHERTEXT -# ----------------------------------------------------------------------------- def get_ciphertext(): - """Load ciphertext from a file or stdin and hex-decode if needed""" ciphertext = load_file(PARAMETERS["filename"]) if PARAMETERS["input_is_hex"]: ciphertext = decode_from_hex(ciphertext) return ciphertext -# ----------------------------------------------------------------------------- -# KEYLENGTH GUESSING SECTION -# ----------------------------------------------------------------------------- - -def guess_key_length(text): - """ - Try key lengths from 1 to max_key_length and print local maximums - - Set key_length to the most possible if it's not set by user. - """ - fitnesses = calculate_fitnesses(text) - if not fitnesses: - raise AnalysisError("No candidates for key length found! Too small file?") - - print_fitnesses(fitnesses) - guess_and_print_divisors(fitnesses) - return get_max_fitnessed_key_length(fitnesses) - - def calculate_fitnesses(text): - """Calculate fitnesses for each keylen""" prev = 0 pprev = 0 fitnesses = [] + key_length = 1 # Initialize key_length for key_length in range(1, PARAMETERS["max_key_length"] + 1): fitness = count_equals(text, key_length) - # smaller key-length with nearly the same fitness is preferable - fitness = (float(fitness) / - (PARAMETERS["max_key_length"] + key_length ** 1.5)) + fitness = float(fitness) / (PARAMETERS["max_key_length"] + key_length**1.5) - if pprev < prev and prev > fitness: # local maximum + if pprev < prev and prev > fitness: fitnesses += [(key_length - 1, prev)] pprev = prev @@ -177,51 +76,18 @@ def calculate_fitnesses(text): return fitnesses -def print_fitnesses(fitnesses): - print("The most probable key lengths:") - - # top sorted by fitness, but print sorted by length - fitnesses.sort(key=itemgetter(1), reverse=True) - top10 = fitnesses[:10] - best_fitness = top10[0][1] - top10.sort(key=itemgetter(0)) - - fitness_sum = calculate_fitness_sum(top10) - fmt = "{C_KEYLEN}{:" + str(len(str(max(i[0] for i in top10)))) + \ - "d}{C_RESET}: {C_PROB}{:5.1f}%{C_RESET}" - - best_colors = COLORS.copy() - best_colors.update({ - 'C_KEYLEN': C_BEST_KEYLEN, - 'C_PROB': C_BEST_PROB, - }) - - for key_length, fitness in top10: - colors = best_colors if fitness == best_fitness else COLORS - pct = round(100 * fitness * 1.0 / fitness_sum, 1) - print(fmt.format(key_length, pct, **colors)) - - -def calculate_fitness_sum(fitnesses): - return sum([f[1] for f in fitnesses]) - - def count_equals(text, key_length): - """Count equal chars count for each offset and sum them""" equals_count = 0 if key_length >= len(text): return 0 for offset in range(key_length): chars_count = chars_count_at_offset(text, key_length, offset) - equals_count += max(chars_count.values()) - 1 # why -1? don't know + equals_count += max(chars_count.values()) - 1 return equals_count def guess_and_print_divisors(fitnesses): - """ - Prints common divisors and returns the most common divisor - """ divisors_counts = [0] * (PARAMETERS["max_key_length"] + 1) for key_length, fitness in fitnesses: for number in range(3, key_length + 1): @@ -231,10 +97,11 @@ def guess_and_print_divisors(fitnesses): limit = 3 ret = 2 - fmt = "Key-length can be {C_DIV}{:d}*n{C_RESET}" for number, divisors_count in enumerate(divisors_counts): if divisors_count == max_divisors: - print(fmt.format(number, **COLORS)) + console.print( + f"Key-length can be [bold blue]{number}[/]", style=STYLE_KEYLEN + ) ret = number limit -= 1 if limit == 0: @@ -263,14 +130,7 @@ def chars_count_at_offset(text, key_length, offset): return chars_count -# ----------------------------------------------------------------------------- -# KEYS GUESSING SECTION -# ----------------------------------------------------------------------------- - def guess_probable_keys_for_chars(text, try_chars): - """ - Guess keys for list of characters. - """ probable_keys = [] key_char_used = {} @@ -285,14 +145,10 @@ def guess_probable_keys_for_chars(text, try_chars): def guess_keys(text, most_char): - """ - Generate all possible keys for key length - and the most possible char - """ key_length = PARAMETERS["known_key_length"] key_possible_bytes = [[] for _ in range(key_length)] - for offset in range(key_length): # each byte of key< + for offset in range(key_length): chars_count = chars_count_at_offset(text, key_length, offset) max_count = max(chars_count.values()) for char in chars_count: @@ -303,9 +159,6 @@ def guess_keys(text, most_char): def all_keys(key_possible_bytes, key_part=(), offset=0): - """ - Produce all combinations of possible key chars - """ keys = [] if offset >= len(key_possible_bytes): return [bytes(key_part)] @@ -316,22 +169,18 @@ def all_keys(key_possible_bytes, key_part=(), offset=0): def print_keys(keys): if not keys: - print("No keys guessed!") + console.print("No keys guessed!", style=STYLE_WARN) return - fmt = "{C_COUNT}{:d}{C_RESET} possible key(s) of length {C_COUNT}{:d}{C_RESET}:" - print(fmt.format(len(keys), len(keys[0]), **COLORS)) + fmt = "[green]{:d}[/] possible key(s) of length [green]{:d}[/]:" + console.print(fmt.format(len(keys), len(keys[0])), style=STYLE_COUNT) for key in keys[:5]: - print(C_KEY + repr(key)[2:-1] + C_RESET) + console.print(repr(key)[2:-1], style=STYLE_KEY) if len(keys) > 10: - print("...") + console.print("...") -# ----------------------------------------------------------------------------- -# RETURNS PERCENTAGE OF VALID TEXT CHARS -# ----------------------------------------------------------------------------- - -def percentage_valid(text): +def percentage_valid(text: bytes): x = 0.0 for c in text: if c in PARAMETERS["text_charset"]: @@ -339,26 +188,14 @@ def percentage_valid(text): return x / len(text) -# ----------------------------------------------------------------------------- -# PRODUCE OUTPUT -# ----------------------------------------------------------------------------- - def produce_plaintexts(ciphertext, keys, key_char_used): - """ - Produce plaintext variant for each possible key, - creates csv files with keys, percentage of valid - characters and used most frequent character - """ cleanup() mkdir(DIRNAME) - # this is split up in two files since the - # key can contain all kinds of characters - fn_key_mapping = "filename-key.csv" fn_perc_mapping = "filename-char_used-perc_valid.csv" - key_mapping = open(os.path.join(DIRNAME, fn_key_mapping), "w") + key_mapping = open(os.path.join(DIRNAME, fn_key_mapping), "w") perc_mapping = open(os.path.join(DIRNAME, fn_perc_mapping), "w") key_mapping.write("file_name;key_repr\n") @@ -373,36 +210,200 @@ def produce_plaintexts(ciphertext, keys, key_char_used): file_name = os.path.join(DIRNAME, key_index + ".out") dexored = dexor(ciphertext, key) - # ignore saving file when known plain is provided and output doesn't contain it if PARAMETERS["known_plain"] and PARAMETERS["known_plain"] not in dexored: continue perc = round(100 * percentage_valid(dexored)) if perc > threshold_valid: count_valid += 1 key_mapping.write("{};{}\n".format(file_name, key_repr)) - perc_mapping.write("{};{};{}\n".format(file_name, - repr(key_char_used[key]), - perc)) - if not PARAMETERS["filter_output"] or \ - (PARAMETERS["filter_output"] and perc > threshold_valid): + perc_mapping.write( + "{};{};{}\n".format(file_name, repr(key_char_used[key]), perc) + ) + if not PARAMETERS["filter_output"] or ( + PARAMETERS["filter_output"] and perc > threshold_valid + ): f = open(file_name, "wb") f.write(dexored) f.close() key_mapping.close() perc_mapping.close() - fmt = "Found {C_COUNT}{:d}{C_RESET} plaintexts with {C_COUNT}{:d}{C_RESET}%+ valid characters" - msg = fmt.format(count_valid, round(threshold_valid), **COLORS) + msg = f"Found [green]{count_valid}[/] plaintexts with [green]{round(threshold_valid)}[/]%+ valid characters" if PARAMETERS["known_plain"]: - msg += " which contained '{}'".format(PARAMETERS["known_plain"].decode('ascii')) - print(msg) - print("See files {}, {}".format(fn_key_mapping, fn_perc_mapping)) + msg += f" which contained '{PARAMETERS['known_plain'].decode('ascii')}'" + console.print(msg) + console.print(f"See files {fn_key_mapping}, {fn_perc_mapping}") + + +def cli_main( + filename: Annotated[ + Optional[str], + typer.Argument(help="Input file (or stdin if omitted)", show_default=False), + ] = None, + hex: Annotated[ + bool, typer.Option("--hex", "-x", help="Input is hex-encoded str") + ] = False, + key_length: Annotated[ + Optional[int], typer.Option("--key-length", "-l", help="Length of the key") + ] = None, + max_keylen: Annotated[ + int, typer.Option("--max-keylen", "-m", help="Maximum key length to probe") + ] = 65, + char: Annotated[ + Optional[str], + typer.Option("--char", "-c", help="Most frequent char (one char or hex code)"), + ] = None, + brute_chars: Annotated[ + bool, + typer.Option( + "--brute-chars", "-b", help="Brute force all possible most frequent chars" + ), + ] = False, + brute_printable: Annotated[ + bool, + typer.Option( + "--brute-printable", + "-o", + help="Same as -b but will only check printable chars", + ), + ] = False, + filter_output: Annotated[ + bool, + typer.Option( + "--filter-output", "-f", help="Filter outputs based on the charset" + ), + ] = False, + text_charset: Annotated[ + Optional[str], + typer.Option( + "--text-charset", + "-t", + help="""Target text character set + +- Custom sets: + + - a: lowercase chars + + - A: uppercase chars + + - 1: digits + + - !: special chars + + - *: printable chars""", + ), + ] = None, + known_plain: Annotated[ + Optional[str], + typer.Option( + "--known-plaintext", "-p", help="Use known plaintext for decoding" + ), + ] = None, + version: Annotated[ + bool, typer.Option("--version", help="Show version and exit") + ] = False, +): + """ + A tool to do some xor analysis: + - guess the key length (based on count of equal chars) -def cleanup(): - if os.path.exists(DIRNAME): - rmdir(DIRNAME) + - guess the key (base on knowledge of most frequent char) + """ + if version: + typer.echo(__version__) + raise typer.Exit() + + PARAMETERS.clear() + PARAMETERS["filename"] = filename + PARAMETERS["input_is_hex"] = hex + PARAMETERS["known_key_length"] = key_length + PARAMETERS["max_key_length"] = max_keylen + PARAMETERS["most_frequent_char"] = ( + ord(char) if char and len(char) == 1 else (int(char, 16) if char else None) + ) + PARAMETERS["brute_chars"] = brute_chars + PARAMETERS["brute_printable"] = brute_printable + PARAMETERS["filter_output"] = filter_output + PARAMETERS["text_charset"] = get_charset(text_charset) + PARAMETERS["known_plain"] = known_plain.encode() if known_plain else None + + try: + ciphertext = get_ciphertext() + if not PARAMETERS["known_key_length"]: + PARAMETERS["known_key_length"] = guess_key_length(ciphertext) + try_chars = [PARAMETERS["most_frequent_char"]] + if PARAMETERS["brute_chars"]: + try_chars = range(256) + elif PARAMETERS["brute_printable"]: + try_chars = map(ord, string.printable) + elif PARAMETERS["most_frequent_char"] is not None: + try_chars = [PARAMETERS["most_frequent_char"]] + else: + console.print( + "Most possible char is needed to guess the key!", style=STYLE_WARN + ) + die("Most possible char is needed to guess the key!") + try_chars = [PARAMETERS["most_frequent_char"]] + + (probable_keys, key_char_used) = guess_probable_keys_for_chars( + ciphertext, try_chars + ) + + print_keys(probable_keys) + produce_plaintexts(ciphertext, probable_keys, key_char_used) + # [TODO] + except AnalysisError as err: + console.print(f"[ERROR] Analysis error:\n\t{err}", style=STYLE_FATAL) + except ArgError as err: + console.print(f"[ERROR] Bad argument:\n\t{err}", style=STYLE_FATAL) + except CharsetError as err: + console.print(f"[ERROR] Bad charset:\n\t{err}", style=STYLE_FATAL) + except IOError as err: + console.print(f"[ERROR] Can't load file:\n\t{err}", style=STYLE_FATAL) + except MkdirError as err: + console.print(f"[ERROR] Can't create directory:\n\t{err}", style=STYLE_FATAL) + except UnicodeDecodeError as err: + console.print(f"[ERROR] Input is not hex:\n\t{err}", style=STYLE_FATAL) + else: + return + cleanup() + raise typer.Exit(1) -if __name__ == "__main__": - main() +def guess_key_length(text): + fitnesses = calculate_fitnesses(text) + if not fitnesses: + raise AnalysisError("No candidates for key length found! Too small file?") + + print_fitnesses(fitnesses) + guess_and_print_divisors(fitnesses) + return get_max_fitnessed_key_length(fitnesses) + + +def print_fitnesses(fitnesses): + console.print("The most probable key lengths:") + + fitnesses.sort(key=itemgetter(1), reverse=True) + top10 = fitnesses[:10] + best_fitness = top10[0][1] + top10.sort(key=itemgetter(0)) + + fitness_sum = calculate_fitness_sum(top10) + width = len(str(max(i[0] for i in top10))) + for key_length, fitness in top10: + style_keylen = STYLE_BEST_KEYLEN if fitness == best_fitness else STYLE_KEYLEN + style_prob = STYLE_BEST_PROB if fitness == best_fitness else STYLE_PROB + pct = round(100 * fitness * 1.0 / fitness_sum, 1) + console.print(f"{key_length:>{width}d}: ", style=style_keylen, end="") + console.print(f"{pct:5.1f}%", style=style_prob) + + +def calculate_fitness_sum(fitnesses): + return sum([f[1] for f in fitnesses]) + + +def main(): + if len(sys.argv) == 1: + sys.argv.append("--help") + typer.run(cli_main) diff --git a/xortool/tool_xor.py b/xortool/tool_xor.py index 2abe333..32f471b 100755 --- a/xortool/tool_xor.py +++ b/xortool/tool_xor.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 from xortool import __version__ + __doc__ = f""" xortool-xor {__version__} xor strings @@ -25,8 +26,10 @@ def main(): newline = True try: opts, _ = getopt.getopt( - sys.argv[1:], "ns:r:h:f:", - ["cycle", "no-cycle", "nc", "no-newline", "newline"]) + sys.argv[1:], + "ns:r:h:f:", + ["cycle", "no-cycle", "nc", "no-newline", "newline"], + ) datas = [] for c, val in opts: if c == "--cycle": @@ -65,7 +68,7 @@ def xor(args, cycle=True): def from_str(s): - res = b'' + res = b"" for char in s.encode("utf-8").decode("unicode_escape"): res += bytes([ord(char)]) return res @@ -89,5 +92,5 @@ def arg_data(opt, s): raise getopt.GetoptError("unknown option -%s" % opt) -if __name__ == '__main__': +if __name__ == "__main__": main()