Skip to content

Commit 59d3740

Browse files
committed
made find_key and get_frequencies
1 parent e120ae4 commit 59d3740

File tree

1 file changed

+65
-4
lines changed

1 file changed

+65
-4
lines changed

ciphers/break_vigenere.py

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
}
77
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
88
PARAMETER = 0.0665 # index of confidence of the entire language (for english 0.0665)
9+
MAX_KEYLENGTH = 10 # None is the default
910

1011

1112
def index_of_coincidence(frequencies: dict, length: int) -> float:
@@ -82,17 +83,77 @@ def friedman_method(ciphertext: str, max_keylength: int=None) -> int:
8283
return li[1]
8384

8485

86+
def get_frequencies() -> tuple:
87+
"""Return the values of the global variable @letter_frequencies_dict as a tuple ex. (0.25, 1.42, ...)."""
88+
t = tuple(LETTER_FREQUENCIES_DICT[chr(i)] for i in range(ord('A'), ord('A') + 26))
89+
return tuple(num / 100 for num in t)
90+
91+
92+
def find_key(ciphertext: str, key_length: int) -> str:
93+
"""
94+
Finds the key of a text which has been encrypted with the Vigenere algorithm, using statistical analysis.
95+
The function needs an estimation of the length of the key. Firstly it finds the frequencies of the letters in the
96+
text. Then it compares these frequencies with those of an average text in the english language. For each letter it
97+
multiplies its frequency with the average one and adds them all together, then it shifts the frequencies of the text
98+
cyclically by one position and repeats the process. The shift that produces the largest sum corresponds to a letter
99+
of the key. The whole procedure takes place for every letter of the key (essentially as many times as the length
100+
of the key).
101+
:param ciphertext: a string (text)
102+
:param key_length: a supposed length of the key
103+
:return: the key as a string
104+
"""
105+
a = ord('A')
106+
cipher_length = len(ciphertext)
107+
alphabet_length = 26 # the length of the english alphabet
108+
109+
key = []
110+
111+
# for every letter of the key
112+
for k in range(key_length):
113+
# find the frequencies of the letters in the message:
114+
# the frequency of 'A' is in the first position of the freq list and so on
115+
freq = [0]*alphabet_length
116+
c = 0
117+
for i in range(k, cipher_length, key_length):
118+
freq[ord(ciphertext[i]) - a] += 1
119+
c += 1
120+
freq = [num / c for num in freq]
121+
122+
# find the max sum -> part of the key
123+
real_freq = get_frequencies()
124+
max1 = [-1, None] # value, position
125+
for i in range(alphabet_length):
126+
new_val = sum((freq[j] * real_freq[j]) for j in range(alphabet_length))
127+
if max1[0] < new_val:
128+
max1 = [new_val, i]
129+
freq.append(freq.pop(0)) # shift the list cyclically one position to the left
130+
key.append(max1[1])
131+
132+
return "".join(chr(num + a) for num in key) # return the key as a string
133+
134+
85135
def find_key_from_vigenere_cipher(ciphertext: str) -> str:
86136
clean_ciphertext = list()
87-
for symbol in ciphertext:
137+
for symbol in ciphertext.upper():
88138
if symbol in LETTERS:
89-
clean_ciphertext.append(symbol.upper())
139+
clean_ciphertext.append(symbol)
90140

91141
clean_ciphertext = "".join(clean_ciphertext)
142+
print(clean_ciphertext)
143+
144+
key_length = friedman_method(clean_ciphertext, max_keylength=MAX_KEYLENGTH)
145+
print(f"The length of the key is {key_length}")
146+
if key_length <= 0:
147+
print("Something went wrong while calculating the length of the key.")
148+
return ""
92149

93-
key = "" # todo replace with function
150+
key = find_key(clean_ciphertext, key_length)
94151
return key
95152

96153

97154
if __name__ == '__main__':
98-
print(index_of_coincidence(LETTER_FREQUENCIES_DICT, 1000))
155+
# print(index_of_coincidence(LETTER_FREQUENCIES_DICT, 1000))
156+
with open("out.txt") as file:
157+
c = file.read()
158+
k = find_key_from_vigenere_cipher(c)
159+
print(k)

0 commit comments

Comments
 (0)