alephdata · erdgeist · Aug 22, 2022 · Aug 23, 2022 · Aug 27, 2022 · Aug 27, 2022
diff --git a/crodump/Database.py b/crodump/Database.py
@@ -171,7 +171,7 @@ def enumerate_tables(self, files=False):
             dbdef = self.decode_db_definition(dbinfo[1:])
         except Exception as e:
             print("ERROR decoding db definition: %s" % e)
-            print("This could possibly mean that you need to try with the --strucrack option")
+            print("This could possibly mean that you need to try     crodump strucrack     to deduct the database key first")
             return
 
         for k, v in dbdef.items():
@@ -200,6 +200,7 @@ def enumerate_records(self, table):
                     print("Record %d too short: -- %s" % (i+1, ashex(data)), file=stderr)
                 except Exception as e:
                     print("Record %d broken: ERROR '%s' -- %s" % (i+1, e, ashex(data)), file=stderr)
+            del data
 
     def enumerate_files(self, table):
         """

diff --git a/crodump/croconvert.py b/crodump/croconvert.py
@@ -59,6 +59,9 @@ def csv_output(kod, args):
 
                 filereferences.extend([field for field in record.fields if field.typ == 6])
 
+    if args.nofiles:
+        return
+
     # Write all files from the file table. This is useful for unreferenced files
     for table in db.enumerate_tables(files=True):
         filedir = "Files-" + table.abbrev
@@ -95,6 +98,7 @@ def main():
     parser.add_argument("--strucrack", action="store_true", help="infer the KOD sbox from CroStru.dat")
     parser.add_argument("--dbcrack", action="store_true", help="infer the KOD sbox from CroIndex.dat+CroBank.dat")
     parser.add_argument("--nokod", "-n", action="store_true", help="don't KOD decode")
+    parser.add_argument("--nofiles", "-F", action="store_true", help="don't export files with .csv export")
     parser.add_argument("dbdir", type=str)
     args = parser.parse_args()
 
@@ -105,25 +109,21 @@ def main():
         kod = crodump.koddecoder.new(list(unhex(args.kod)))
     elif args.nokod:
         kod = None
-    elif args.strucrack:
-        class Cls: pass
-        cargs = Cls()
-        cargs.dbdir = args.dbdir
-        cargs.sys = False
-        cargs.silent = True
-        cracked = strucrack(None, cargs)
-        if not cracked:
-            return
-        kod = crodump.koddecoder.new(cracked)
-    elif args.dbcrack:
+    elif args.strucrack or args.dbcrack:
         class Cls: pass
         cargs = Cls()
         cargs.dbdir = args.dbdir
         cargs.sys = False
         cargs.silent = True
-        cracked = dbcrack(None, cargs)
+        cargs.fix = []
+        cargs.color = False
+        cargs.width = 24
+        cargs.noninteractive = True
+        cracked = strucrack(None, cargs) if args.strucrack else dbcrack(None, cargs)
         if not cracked:
-            return
+            exit(
+            "Can't automatically crack the database password. Try using   crodump strucrack   and pass the database key (KOD) using --kod"
+            )
         kod = crodump.koddecoder.new(cracked)
     else:
         kod = crodump.koddecoder.new()

diff --git a/crodump/crodump.py b/crodump/crodump.py
@@ -1,10 +1,10 @@
 from .kodump import kod_hexdump
-from .hexdump import unhex, tohex
+from .koddecoder import INITIAL_KOD, match_with_mismatches
+from .hexdump import unhex, tohex, asambigoushex, asasc, aschr, as1251, ashex
 from .readers import ByteReader
 from .Database import Database
 from .Datamodel import TableDefinition
 
-
 def destruct_sys3_def(rd):
     # todo
     pass
@@ -103,6 +103,21 @@ def destruct(kod, args):
     elif args.type == 3:
         destruct_sys_definition(args, data)
 
+def color_code(c, confidence, force):
+    from sys import stdout
+    is_a_tty = hasattr(stdout, 'isatty') and stdout.isatty()
+    if not force and not is_a_tty:
+        return c
+
+    if confidence < 0:
+        return "\033[96m" + c + "\033[0m"
+    if confidence == 0:
+        return "\033[31m" + c + "\033[0m"
+    if confidence == 255:
+        return "\033[32m" + c + "\033[0m"
+    if confidence > 3:
+        return "\033[93m" + c + "\033[0m"
+    return "\033[94m" + c + "\033[0m"
 
 def strucrack(kod, args):
     """
@@ -131,11 +146,138 @@ def strucrack(kod, args):
             xref[(ofs+i+1)%256][byte] += 1
 
     KOD = [0] * 256
+    KOD_CONFIDENCE = [0] * 256
     for i, xx in enumerate(xref):
         k, v = max(enumerate(xx), key=lambda kv: kv[1])
+
+#       Display the confidence, matches under 3 usually are unreliable
+#       print("%02x :: %02x :: %d" % (i, k, v))
         KOD[k] = i
+        KOD_CONFIDENCE[k] = v
+
+#       Test deducted KOD against the default one, for debugging purposes
+#        if KOD[k] != INITIAL_KOD[k]:
+#            print("# KOD[%02x] == %02x, should be %02x" % (i, KOD[i], INITIAL_KOD[i]))
+#            KOD[k] = -1
+
+    for fix in args.fix or []:
+        if len(fix) != 6:
+            print("Invalid Fix format. Use xxyy=C or xxyycc")
+            continue
+
+        if (fix[4] != "="):
+            i, o, c = unhex(fix)
+        else:
+            i, o = unhex(fix[0:4])
+            c, = as1251(fix[5:])
+
+        KOD[i] = (c + o) % 256
+        KOD_CONFIDENCE[i] = 255
+        # print("%02x %02x %02x" % ((c + o) % 256, i, o))
+
+    # For chunks of text where record and offset is known, set the KOD
+    for fix in args.text or []:
+        record, line, offset, text = fix.split(':', 4)
+        data = table.readrec(int(record)+1)
+        dataoff = int(line) + int(offset)
+        o = int(record) + 1 + int(line) + int(offset)
+        for i, c in enumerate(text):
+            d = data[dataoff + i]
+            KOD[d] = (int.from_bytes(as1251(c), "little") + o + i) % 256
+            KOD_CONFIDENCE[d] = 255
+
+    kod_set = set([v for o, v in enumerate(KOD) if KOD_CONFIDENCE[o] > 0])
+    unset_entries = [o for o, v in enumerate(KOD) if KOD_CONFIDENCE[o] == 0]
+    unused_values = [v for v in sorted(set(range(0,256)).difference(kod_set))]
+
+    # if there's only one mapping missing in KOD and only one value not used, we
+    # just assume those to belong together with a low confidence
+    if len(unset_entries) == 1 and len(unused_values) == 1:
+        entry = unset_entries[0]
+        KOD[entry] = unused_values[0]
+        KOD_CONFIDENCE[entry] = 1
+
+    # Show duplicates that may arise by the user forcing KOD entries from command line
+    kod_set = [v for o, v in enumerate(KOD) if KOD_CONFIDENCE[o] > 0]
+    duplicates = [(o, v) for o, v in enumerate(KOD) if kod_set.count(v) > 1 and KOD_CONFIDENCE[o] > 0]
+    duplicates = sorted(duplicates, key=lambda x: x[1])
+
+    for o, v in duplicates:
+        if KOD_CONFIDENCE[o] < 255:
+            KOD_CONFIDENCE[o] = -1
+
+    import crodump.koddecoder
+    kod = crodump.koddecoder.new(KOD, KOD_CONFIDENCE)
+
+    known_strings = [
+        (b'USERINFO', 4, b'\x08USERINFO', -1),
+        (b'Version', 4, b'\x07Version', -1),
+        (b'\x08BankName', 5, b'\x08BankName', 0),
+        (as1251("Системный номер"), 6, b'\x00\x00\x00\x00\x00\x00\x0f' + as1251("Системный номер") + b'\x01\x00\x00\x00\x00', -7)
+    ]
+
+    force_color = args.color
+
+    # Dump partially decoded stru records for the user to try to spot patterns
+    w = args.width
+    for i, data in enumerate(table.enumrecords()):
+        if not data: continue
+
+        print("Processing record number %d" % i )
+
+        candidate, candidate_confidence = kod.try_decode(i + 1, data)
+
+        for s, maxsubs, deststring, destoffset in known_strings:
+            incomplete_matches = match_with_mismatches(candidate, candidate_confidence, s, maxsubs)
+            # print(sisnm)
+            for ofix in incomplete_matches:
+                do = ofix[0]
+                print("Found %s which looks a lot like %s " % (asasc(candidate[do:do+len(s)]), asasc(s)) )
+                print("Add the following switches to your command line to fix the decoder box:\n    ", end='')
+                for o, c in enumerate(deststring):
+                    print("-f %02x%02x%02x " % (data[do + o + destoffset], (do + i + 1 + o + destoffset) % 256, c), end='')
+                print("\n")
+
+        candidate_chunks = [candidate[j:j+w] for j in range(0, len(candidate), w)]
+        for ofs, chunk in enumerate(candidate_chunks):
+            confidence = candidate_confidence[ofs * w:ofs * w + w]
+            text = asasc(chunk, confidence)
+            hexed = asambigoushex(chunk, confidence)
+
+            colored = "".join(color_code(c, confidence[o], force_color) for o, c in enumerate(text))
+            colored_hexed = "".join(color_code(c, confidence[o>>1], force_color) for o, c in enumerate(hexed))
+            fix_helper = " ".join("%02x%02x=%s" % (b, (w * ofs + i + 1 + o) % 256, color_code(text[o], confidence[o], force_color)) for o, b in enumerate(data[ofs * w:ofs * w + w]))
+
+
+            # Can't use left padding in format string, because we have color escape codes,
+            # so do manual padding
+            padding = " " * (w - len(chunk))
+
+            print ("%05d %s : %s : %s" % (w * ofs, colored + padding, colored_hexed + padding * 2, fix_helper))
+        print()
+
+    if len(duplicates):
+        print("\nDuplicates found:\n" + ", ".join(color_code("[%02x=>%02x (%d)]" % (o, v, KOD_CONFIDENCE[o]), KOD_CONFIDENCE[o], force_color) for o, v in duplicates))
+
+    # If the KOD is not completely resolved, show the missing mappings
+    unset_count = KOD_CONFIDENCE.count(0)
+    if unset_count > 0:
+        if args.noninteractive:
+            return
+        if not args.silent:
+            unset_entries = ", ".join(["%02x" % o for o, v in enumerate(KOD) if KOD_CONFIDENCE[o] == 0])
+            unused_values = ", ".join(["%02x" % v for v in sorted(set(range(0,256)).difference(set(kod_set)))])
+            print("\nAmbigous result when cracking. %d entries unsolved. Missing mappings:" % unset_count )
+            print("[%s] => [%s]\n" % (unset_entries, unused_values ))
+            print("KOD estimate:")
+            print("".join(color_code("%02x" % c if KOD_CONFIDENCE[o] > 0 else "??", KOD_CONFIDENCE[o], force_color) for o, c in enumerate(KOD) ))
+
+            print("\nIf you can provide clues for unresolved KOD entries by looking at the output, pass them via")
+            print("crodump strucrack -f f103=B  -f f10342")
+        return [0 if KOD_CONFIDENCE[o] == 0 else _ for o, _ in enumerate(KOD)]
 
     if not args.silent:
+        print("Use the following database key to decrypt the database with crodump or croconvert with the --kod option:")
         print(tohex(bytes(KOD)))
 
     return KOD
@@ -227,7 +369,7 @@ def main():
     p.add_argument("--find1d", action="store_true", help="Find records with 0x1d in it")
     p.add_argument("--stats", action="store_true", help="calc table stats from the first byte of each record",)
     p.add_argument("--index", action="store_true", help="dump CroIndex")
-    p.add_argument("--stru", action="store_true", help="dump CroIndex")
+    p.add_argument("--stru", action="store_true", help="dump CroStru")
     p.add_argument("--bank", action="store_true", help="dump CroBank")
     p.add_argument("--sys", action="store_true", help="dump CroSys")
     p.add_argument("dbdir", type=str)
@@ -248,6 +390,12 @@ def main():
     p = subparsers.add_parser("strucrack", help="Crack v4 KOD encrypion, bypassing the need for the database password.")
     p.add_argument("--sys", action="store_true", help="Use CroSys for cracking")
     p.add_argument("--silent", action="store_true", help="no output")
+    p.add_argument("--noninteractive", action="store_true", help="Stop if automatic cracking fails")
+    p.add_argument("--color", action="store_true", help="force color output even on non-ttys")
+    p.add_argument("--fix", "-f", action="append", dest="fix", help="force KOD entries after identification")
+    p.add_argument("--text", "-t", action="append", dest="text", help="add fixed bytes to decoder box by providing whole strings for a position in a record, format is record:line:offset:plaintext")
+    p.add_argument("--width", "-w", type=int, help="max number of decoded characters on screen", default=24)
+
     p.add_argument("dbdir", type=str)
     p.set_defaults(handler=strucrack)
 
@@ -271,6 +419,11 @@ class Cls: pass
         cargs.dbdir = args.dbdir
         cargs.sys = False
         cargs.silent = True
+        cargs.noninteractive = False
+        # add all keys we forgot to add
+        for k, v in args.__dict__.items():
+            if not cargs.__dict__.get(k):
+                cargs.__dict__.update({k: v})
         cracked = strucrack(None, cargs)
         if not cracked:
             return
@@ -281,6 +434,7 @@ class Cls: pass
         cargs.dbdir = args.dbdir
         cargs.sys = False
         cargs.silent = True
+        cargs.noninteractive = False
         cracked = dbcrack(None, cargs)
         if not cracked:
             return

diff --git a/crodump/hexdump.py b/crodump/hexdump.py
@@ -22,6 +22,23 @@ def ashex(line):
     """
     return " ".join("%02x" % _ for _ in line)
 
+def asambigoushex(line, confidence):
+    """
+    convert an array to a list of 2-digit hex values with potentially unset values of -1
+    """
+    return "".join("%02x" % _ if confidence[o] > 0 else "??" for o, _ in enumerate(line))
+
+def as1251(b):
+    """
+    convert a unicode character to a CP-1251 byte
+    This will help parse cyrillic user entries from command line.
+    """
+    try:
+        c = str(b).encode("cp1251")
+        return bytes(c)
+    except:
+        pass
+    return bytes(".")
 
 def aschr(b):
     """
@@ -41,12 +58,14 @@ def aschr(b):
     return "."
 
 
-def asasc(line):
+def asasc(line, confidence=None):
     """
     convert a CP-1251 encoded byte-array to a line of unicode characters.
     """
-    return "".join(aschr(_) for _ in line)
-
+    if confidence == None:
+        return "".join(aschr(_) for _ in line)
+    else:
+        return "".join(aschr(_) if confidence[o] > 0 else "?" for o, _ in enumerate(line))
 
 def hexdump(ofs, data, args):
     """

diff --git a/crodump/koddecoder.py b/crodump/koddecoder.py
@@ -26,13 +26,15 @@ class KODcoding:
     class handing KOD encoding and decoding, optionally
     with a user specified KOD table.
     """
-    def __init__(self, initial=INITIAL_KOD):
+    def __init__(self, initial=INITIAL_KOD, confidence=[255] * 256):
         self.kod = [_ for _ in initial]
+        self.confidence = confidence
 
         # calculate the inverse table.
         self.inv = [0 for _ in initial]
         for i, x in enumerate(self.kod):
-            self.inv[x] = i
+            if confidence[i]:
+                self.inv[x] = i
 
     def decode(self, o, data):
         """
@@ -41,6 +43,16 @@ def decode(self, o, data):
         """
         return bytes((self.kod[b] - i - o) % 256 for i, b in enumerate(data))
 
+    def try_decode(self, o, data):
+        """
+        decode : shift, a[0]..a[n-1] -> b[0]..b[n-1]
+            b[i] = KOD[a[i]]- (i+shift)
+        """
+        return (
+            [(self.kod[b] - i - o) % 256 if self.confidence[b] != 0 else 0 for i, b in enumerate(data)],
+            [self.confidence[b] for b in data]
+        )
+
     def encode(self, o, data):
         """
         encode : shift, b[0]..b[n-1] -> a[0]..a[n-1]
@@ -55,5 +67,27 @@ def new(*args):
     """
     return KODcoding(*args)
 
+def match_with_mismatches(data, confidence, string, maxsubs=None):
+    """
+    find all occurences of string in data with at least one and allowing a
+    maximum of maxsubs substitutions
+    """
+
+    # default for maximum of substitutions is to have at least two matching chars
+    maxsubs = maxsubs if maxsubs is not None else max( 2, len(string) - 2)
+
+    # if string cant fit into data, return no matches
+    if len(string) > len(data):
+        return []
+
+    matches = []
+    for offs in range(0, len(data) - len(string)):
+        matching = 0
+        for o, c in enumerate(string):
+            if data[offs + o] == c and confidence[offs + o] > 0:
+                matching += 1
 
+        if matching != len(string) and matching >= maxsubs:
+            matches.append((offs, matching))
 
+    return sorted(matches, key=lambda x: x[1])