Skip to content

Commit 81bdc6e

Browse files
Update report
1 parent b8dad8d commit 81bdc6e

File tree

12 files changed

+5992
-614
lines changed

12 files changed

+5992
-614
lines changed

data/jtr/dumb16.conf

Lines changed: 42 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# This software is Copyright (c) 2012-2020 magnum, and it is hereby
1+
# This software is Copyright (c) 2012-2024 magnum, and it is hereby
22
# released to the general public under the following terms:
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted.
55
#
66
# Generic implementation of "dumb" exhaustive search of Unicode BMP.
7-
# Default is to try *all* allocated characters in the BMP of Unicode v13
8-
# (there's 55,387 of them). Even if a fast format can exhaust two characters
7+
# Default is to try *all* allocated characters in the BMP of Unicode v16
8+
# (there's 55,537 of them). Even if a fast format can exhaust two characters
99
# in 15 minutes, three characters would take 1.5 years...
1010
#
1111
# Note that these modes will handle --max-len differently than normal: They
@@ -22,7 +22,7 @@ int maxlength; // Maximum password length to try
2222
int last; // Last character position, zero-based
2323
int lastid; // Character index in the last position
2424
int id[0x7f]; // Current character indices for other positions
25-
int charset[0x10000], c0; // Characters
25+
int charset[0xd900], c0; // Characters
2626

2727
void init()
2828
{
@@ -43,7 +43,7 @@ void init()
4343

4444
/*
4545
* This defines the character set. This is auto-generated from UnicodeData.txt
46-
* and we skip control characters.
46+
* of Unicode 16.0.0 and we skip control characters.
4747
*/
4848
i = 0;
4949
// 0000..007F; Basic Latin
@@ -119,9 +119,6 @@ void init()
119119
charset[i++] = c++;
120120
// 0600..06FF; Arabic
121121
c = 0x600; // from ARABIC NUMBER SIGN
122-
while (c <= 0x61c) // ..to ARABIC LETTER MARK
123-
charset[i++] = c++;
124-
c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK
125122
while (c <= 0x6ff) // ..to ARABIC LETTER HEH WITH INVERTED V
126123
charset[i++] = c++;
127124
// 0700..074F; Syriac
@@ -163,14 +160,17 @@ void init()
163160
c = 0x860; // from SYRIAC LETTER MALAYALAM NGA
164161
while (c <= 0x86a) // ..to SYRIAC LETTER MALAYALAM SSA
165162
charset[i++] = c++;
166-
// 08A0..08FF; Arabic Extended-A
167-
c = 0x8a0; // from ARABIC LETTER BEH WITH SMALL V BELOW
168-
while (c <= 0x8b4) // ..to ARABIC LETTER KAF WITH DOT BELOW
163+
// 0870..089F; Arabic Extended-B
164+
c = 0x870; // from ARABIC LETTER ALEF WITH ATTACHED FATHA
165+
while (c <= 0x88e) // ..to ARABIC VERTICAL TAIL
169166
charset[i++] = c++;
170-
c = 0x8b6; // from ARABIC LETTER BEH WITH SMALL MEEM ABOVE
171-
while (c <= 0x8c7) // ..to ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
167+
charset[i++] = 0x890; // ARABIC POUND MARK ABOVE
168+
charset[i++] = 0x891; // ARABIC PIASTRE MARK ABOVE
169+
c = 0x897; // from ARABIC PEPET
170+
while (c <= 0x89f) // ..to ARABIC HALF MADDA OVER MADDA
172171
charset[i++] = c++;
173-
c = 0x8d3; // from ARABIC SMALL LOW WAW
172+
// 08A0..08FF; Arabic Extended-A
173+
c = 0x8a0; // from ARABIC LETTER BEH WITH SMALL V BELOW
174174
while (c <= 0x8ff) // ..to ARABIC MARK SIDEWAYS NOON GHUNNA
175175
charset[i++] = c++;
176176
// 0900..097F; Devanagari
@@ -360,7 +360,7 @@ void init()
360360
c = 0xc2a; // from TELUGU LETTER PA
361361
while (c <= 0xc39) // ..to TELUGU LETTER HA
362362
charset[i++] = c++;
363-
c = 0xc3d; // from TELUGU SIGN AVAGRAHA
363+
c = 0xc3c; // from TELUGU SIGN NUKTA
364364
while (c <= 0xc44) // ..to TELUGU VOWEL SIGN VOCALIC RR
365365
charset[i++] = c++;
366366
charset[i++] = 0xc46; // TELUGU VOWEL SIGN E
@@ -406,14 +406,16 @@ void init()
406406
charset[i++] = c++;
407407
charset[i++] = 0xcd5; // KANNADA LENGTH MARK
408408
charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK
409+
charset[i++] = 0xcdd; // KANNADA LETTER NAKAARA POLLU
410+
charset[i++] = 0xcde; // KANNADA LETTER FA
409411
c = 0xce0; // from KANNADA LETTER VOCALIC RR
410412
while (c <= 0xce3) // ..to KANNADA VOWEL SIGN VOCALIC LL
411413
charset[i++] = c++;
412414
c = 0xce6; // from KANNADA DIGIT ZERO
413415
while (c <= 0xcef) // ..to KANNADA DIGIT NINE
414416
charset[i++] = c++;
415417
charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA
416-
charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA
418+
charset[i++] = 0xcf3; // KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
417419
// 0D00..0D7F; Malayalam
418420
c = 0xd00; // from MALAYALAM SIGN COMBINING ANUSVARA ABOVE
419421
while (c <= 0xd0c) // ..to MALAYALAM LETTER VOCALIC L
@@ -483,7 +485,7 @@ void init()
483485
while (c <= 0xec4) // ..to LAO VOWEL SIGN AI
484486
charset[i++] = c++;
485487
c = 0xec8; // from LAO TONE MAI EK
486-
while (c <= 0xecd) // ..to LAO NIGGAHITA
488+
while (c <= 0xece) // ..to LAO YAMAKKAN
487489
charset[i++] = c++;
488490
c = 0xed0; // from LAO DIGIT ZERO
489491
while (c <= 0xed9) // ..to LAO DIGIT NINE
@@ -596,11 +598,9 @@ void init()
596598
charset[i++] = c++;
597599
// 1700..171F; Tagalog
598600
c = 0x1700; // from TAGALOG LETTER A
599-
while (c <= 0x170c) // ..to TAGALOG LETTER YA
600-
charset[i++] = c++;
601-
c = 0x170e; // from TAGALOG LETTER LA
602-
while (c <= 0x1714) // ..to TAGALOG SIGN VIRAMA
601+
while (c <= 0x1715) // ..to TAGALOG SIGN PAMUDPOD
603602
charset[i++] = c++;
603+
charset[i++] = 0x171f; // TAGALOG LETTER ARCHAIC RA
604604
// 1720..173F; Hanunoo
605605
c = 0x1720; // from HANUNOO LETTER A
606606
while (c <= 0x1736) // ..to PHILIPPINE DOUBLE PUNCTUATION
@@ -629,9 +629,6 @@ void init()
629629
charset[i++] = c++;
630630
// 1800..18AF; Mongolian
631631
c = 0x1800; // from MONGOLIAN BIRGA
632-
while (c <= 0x180e) // ..to MONGOLIAN VOWEL SEPARATOR
633-
charset[i++] = c++;
634-
c = 0x1810; // from MONGOLIAN DIGIT ZERO
635632
while (c <= 0x1819) // ..to MONGOLIAN DIGIT NINE
636633
charset[i++] = c++;
637634
c = 0x1820; // from MONGOLIAN LETTER A
@@ -704,14 +701,14 @@ void init()
704701
charset[i++] = c++;
705702
// 1AB0..1AFF; Combining Diacritical Marks Extended
706703
c = 0x1ab0; // from COMBINING DOUBLED CIRCUMFLEX ACCENT
707-
while (c <= 0x1ac0) // ..to COMBINING LATIN SMALL LETTER TURNED W BELOW
704+
while (c <= 0x1ace) // ..to COMBINING LATIN SMALL LETTER INSULAR T
708705
charset[i++] = c++;
709706
// 1B00..1B7F; Balinese
710707
c = 0x1b00; // from BALINESE SIGN ULU RICEM
711-
while (c <= 0x1b4b) // ..to BALINESE LETTER ASYURA SASAK
708+
while (c <= 0x1b4c) // ..to BALINESE LETTER ARCHAIC JNYA
712709
charset[i++] = c++;
713-
c = 0x1b50; // from BALINESE DIGIT ZERO
714-
while (c <= 0x1b7c) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
710+
c = 0x1b4e; // from BALINESE INVERTED CARIK SIKI
711+
while (c <= 0x1b7f) // ..to BALINESE PANTI BAWAK
715712
charset[i++] = c++;
716713
// 1B80..1BBF; Sundanese
717714
c = 0x1b80; // from SUNDANESE SIGN PANYECEK
@@ -739,7 +736,7 @@ void init()
739736
charset[i++] = c++;
740737
// 1C80..1C8F; Cyrillic Extended-C
741738
c = 0x1c80; // from CYRILLIC SMALL LETTER ROUNDED VE
742-
while (c <= 0x1c88) // ..to CYRILLIC SMALL LETTER UNBLENDED UK
739+
while (c <= 0x1c8a) // ..to CYRILLIC SMALL LETTER TJE
743740
charset[i++] = c++;
744741
// 1C90..1CBF; Georgian Extended
745742
c = 0x1c90; // from GEORGIAN MTAVRULI CAPITAL LETTER AN
@@ -765,9 +762,6 @@ void init()
765762
charset[i++] = c++;
766763
// 1DC0..1DFF; Combining Diacritical Marks Supplement
767764
c = 0x1dc0; // from COMBINING DOTTED GRAVE ACCENT
768-
while (c <= 0x1df9) // ..to COMBINING WIDE INVERTED BRIDGE BELOW
769-
charset[i++] = c++;
770-
c = 0x1dfb; // from COMBINING DELETION MARK
771765
while (c <= 0x1dff) // ..to COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
772766
charset[i++] = c++;
773767
// 1E00..1EFF; Latin Extended Additional
@@ -831,7 +825,7 @@ void init()
831825
charset[i++] = c++;
832826
// 20A0..20CF; Currency Symbols
833827
c = 0x20a0; // from EURO-CURRENCY SIGN
834-
while (c <= 0x20bf) // ..to BITCOIN SIGN
828+
while (c <= 0x20c0) // ..to SOM SIGN
835829
charset[i++] = c++;
836830
// 20D0..20FF; Combining Diacritical Marks for Symbols
837831
c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE
@@ -859,7 +853,7 @@ void init()
859853
charset[i++] = c++;
860854
// 2400..243F; Control Pictures
861855
c = 0x2400; // from SYMBOL FOR NULL
862-
while (c <= 0x2426) // ..to SYMBOL FOR SUBSTITUTE FORM TWO
856+
while (c <= 0x2429) // ..to SYMBOL FOR DELETE MEDIUM SHADE FORM
863857
charset[i++] = c++;
864858
// 2440..245F; Optical Character Recognition
865859
c = 0x2440; // from OCR HOOK
@@ -925,10 +919,7 @@ void init()
925919
charset[i++] = c++;
926920
// 2C00..2C5F; Glagolitic
927921
c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU
928-
while (c <= 0x2c2e) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
929-
charset[i++] = c++;
930-
c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU
931-
while (c <= 0x2c5e) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE
922+
while (c <= 0x2c5f) // ..to GLAGOLITIC SMALL LETTER CAUDATE CHRIVI
932923
charset[i++] = c++;
933924
// 2C60..2C7F; Latin Extended-C
934925
c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR
@@ -989,7 +980,7 @@ void init()
989980
charset[i++] = c++;
990981
// 2E00..2E7F; Supplemental Punctuation
991982
c = 0x2e00; // from RIGHT ANGLE SUBSTITUTION MARKER
992-
while (c <= 0x2e52) // ..to TIRONIAN SIGN CAPITAL ET
983+
while (c <= 0x2e5d) // ..to OBLIQUE HYPHEN
993984
charset[i++] = c++;
994985
// 2E80..2EFF; CJK Radicals Supplement
995986
c = 0x2e80; // from CJK RADICAL REPEAT
@@ -1004,7 +995,7 @@ void init()
1004995
charset[i++] = c++;
1005996
// 2FF0..2FFF; Ideographic Description Characters
1006997
c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT
1007-
while (c <= 0x2ffb) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
998+
while (c <= 0x2fff) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
1008999
charset[i++] = c++;
10091000
// 3000..303F; CJK Symbols and Punctuation
10101001
c = 0x3000; // from IDEOGRAPHIC SPACE
@@ -1039,8 +1030,9 @@ void init()
10391030
charset[i++] = c++;
10401031
// 31C0..31EF; CJK Strokes
10411032
c = 0x31c0; // from CJK STROKE T
1042-
while (c <= 0x31e3) // ..to CJK STROKE Q
1033+
while (c <= 0x31e5) // ..to CJK STROKE SZP
10431034
charset[i++] = c++;
1035+
charset[i++] = 0x31ef; // IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
10441036
// 31F0..31FF; Katakana Phonetic Extensions
10451037
c = 0x31f0; // from KATAKANA LETTER SMALL KU
10461038
while (c <= 0x31ff) // ..to KATAKANA LETTER SMALL RO
@@ -1066,7 +1058,7 @@ void init()
10661058
charset[i++] = c++;
10671059
// 4E00..9FFF; CJK Unified Ideographs
10681060
c = 0x4e00; // from <CJK Ideograph, First>
1069-
while (c <= 0x9ffc) // ..to <CJK Ideograph, Last>
1061+
while (c <= 0x9fff) // ..to <CJK Ideograph, Last>
10701062
charset[i++] = c++;
10711063
// A000..A48F; Yi Syllables
10721064
c = 0xa000; // from YI SYLLABLE IT
@@ -1098,12 +1090,14 @@ void init()
10981090
charset[i++] = c++;
10991091
// A720..A7FF; Latin Extended-D
11001092
c = 0xa720; // from MODIFIER LETTER STRESS AND HIGH TONE
1101-
while (c <= 0xa7bf) // ..to LATIN SMALL LETTER GLOTTAL U
1093+
while (c <= 0xa7cd) // ..to LATIN SMALL LETTER S WITH DIAGONAL STROKE
11021094
charset[i++] = c++;
1103-
c = 0xa7c2; // from LATIN CAPITAL LETTER ANGLICANA W
1104-
while (c <= 0xa7ca) // ..to LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
1095+
charset[i++] = 0xa7d0; // LATIN CAPITAL LETTER CLOSED INSULAR G
1096+
charset[i++] = 0xa7d1; // LATIN SMALL LETTER CLOSED INSULAR G
1097+
c = 0xa7d5; // from LATIN SMALL LETTER DOUBLE WYNN
1098+
while (c <= 0xa7dc) // ..to LATIN CAPITAL LETTER LAMBDA WITH STROKE
11051099
charset[i++] = c++;
1106-
c = 0xa7f5; // from LATIN CAPITAL LETTER REVERSED HALF H
1100+
c = 0xa7f2; // from MODIFIER LETTER CAPITAL C
11071101
while (c <= 0xa7ff) // ..to LATIN EPIGRAPHIC LETTER ARCHAIC M
11081102
charset[i++] = c++;
11091103
// A800..A82F; Syloti Nagri
@@ -1258,19 +1252,16 @@ void init()
12581252
charset[i++] = c++;
12591253
// FB50..FDFF; Arabic Presentation Forms-A
12601254
c = 0xfb50; // from ARABIC LETTER ALEF WASLA ISOLATED FORM
1261-
while (c <= 0xfbc1) // ..to ARABIC SYMBOL SMALL TAH BELOW
1255+
while (c <= 0xfbc2) // ..to ARABIC SYMBOL WASLA ABOVE
12621256
charset[i++] = c++;
12631257
c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM
1264-
while (c <= 0xfd3f) // ..to ORNATE RIGHT PARENTHESIS
1265-
charset[i++] = c++;
1266-
c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM
12671258
while (c <= 0xfd8f) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
12681259
charset[i++] = c++;
12691260
c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
12701261
while (c <= 0xfdc7) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
12711262
charset[i++] = c++;
12721263
c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM
1273-
while (c <= 0xfdfd) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
1264+
while (c <= 0xfdff) // ..to ARABIC LIGATURE AZZA WA JALL
12741265
charset[i++] = c++;
12751266
// FE00..FE0F; Variation Selectors
12761267
c = 0xfe00; // from VARIATION SELECTOR-1

0 commit comments

Comments
 (0)