6
6
}
7
7
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
8
8
PARAMETER = 0.0665 # index of confidence of the entire language (for english 0.0665)
9
+ MAX_KEYLENGTH = 10 # None is the default
9
10
10
11
11
12
def index_of_coincidence (frequencies : dict , length : int ) -> float :
@@ -82,17 +83,77 @@ def friedman_method(ciphertext: str, max_keylength: int=None) -> int:
82
83
return li [1 ]
83
84
84
85
86
+ def get_frequencies () -> tuple :
87
+ """Return the values of the global variable @letter_frequencies_dict as a tuple ex. (0.25, 1.42, ...)."""
88
+ t = tuple (LETTER_FREQUENCIES_DICT [chr (i )] for i in range (ord ('A' ), ord ('A' ) + 26 ))
89
+ return tuple (num / 100 for num in t )
90
+
91
+
92
+ def find_key (ciphertext : str , key_length : int ) -> str :
93
+ """
94
+ Finds the key of a text which has been encrypted with the Vigenere algorithm, using statistical analysis.
95
+ The function needs an estimation of the length of the key. Firstly it finds the frequencies of the letters in the
96
+ text. Then it compares these frequencies with those of an average text in the english language. For each letter it
97
+ multiplies its frequency with the average one and adds them all together, then it shifts the frequencies of the text
98
+ cyclically by one position and repeats the process. The shift that produces the largest sum corresponds to a letter
99
+ of the key. The whole procedure takes place for every letter of the key (essentially as many times as the length
100
+ of the key).
101
+ :param ciphertext: a string (text)
102
+ :param key_length: a supposed length of the key
103
+ :return: the key as a string
104
+ """
105
+ a = ord ('A' )
106
+ cipher_length = len (ciphertext )
107
+ alphabet_length = 26 # the length of the english alphabet
108
+
109
+ key = []
110
+
111
+ # for every letter of the key
112
+ for k in range (key_length ):
113
+ # find the frequencies of the letters in the message:
114
+ # the frequency of 'A' is in the first position of the freq list and so on
115
+ freq = [0 ]* alphabet_length
116
+ c = 0
117
+ for i in range (k , cipher_length , key_length ):
118
+ freq [ord (ciphertext [i ]) - a ] += 1
119
+ c += 1
120
+ freq = [num / c for num in freq ]
121
+
122
+ # find the max sum -> part of the key
123
+ real_freq = get_frequencies ()
124
+ max1 = [- 1 , None ] # value, position
125
+ for i in range (alphabet_length ):
126
+ new_val = sum ((freq [j ] * real_freq [j ]) for j in range (alphabet_length ))
127
+ if max1 [0 ] < new_val :
128
+ max1 = [new_val , i ]
129
+ freq .append (freq .pop (0 )) # shift the list cyclically one position to the left
130
+ key .append (max1 [1 ])
131
+
132
+ return "" .join (chr (num + a ) for num in key ) # return the key as a string
133
+
134
+
85
135
def find_key_from_vigenere_cipher (ciphertext : str ) -> str :
86
136
clean_ciphertext = list ()
87
- for symbol in ciphertext :
137
+ for symbol in ciphertext . upper () :
88
138
if symbol in LETTERS :
89
- clean_ciphertext .append (symbol . upper () )
139
+ clean_ciphertext .append (symbol )
90
140
91
141
clean_ciphertext = "" .join (clean_ciphertext )
142
+ print (clean_ciphertext )
143
+
144
+ key_length = friedman_method (clean_ciphertext , max_keylength = MAX_KEYLENGTH )
145
+ print (f"The length of the key is { key_length } " )
146
+ if key_length <= 0 :
147
+ print ("Something went wrong while calculating the length of the key." )
148
+ return ""
92
149
93
- key = "" # todo replace with function
150
+ key = find_key ( clean_ciphertext , key_length )
94
151
return key
95
152
96
153
97
154
if __name__ == '__main__' :
98
- print (index_of_coincidence (LETTER_FREQUENCIES_DICT , 1000 ))
155
+ # print(index_of_coincidence(LETTER_FREQUENCIES_DICT, 1000))
156
+ with open ("out.txt" ) as file :
157
+ c = file .read ()
158
+ k = find_key_from_vigenere_cipher (c )
159
+ print (k )
0 commit comments