Skip to content

Commit d8c6d02

Browse files
committed
made more comments
1 parent 59d3740 commit d8c6d02

File tree

1 file changed

+20
-15
lines changed

1 file changed

+20
-15
lines changed

ciphers/break_vigenere.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,22 @@
66
}
77
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
88
PARAMETER = 0.0665 # index of confidence of the entire language (for english 0.0665)
9-
MAX_KEYLENGTH = 10 # None is the default
9+
MAX_KEYLENGTH = None # None is the default, you can also try a positive integer (example: 10)
1010

1111

1212
def index_of_coincidence(frequencies: dict, length: int) -> float:
1313
"""
1414
Calculates the index of coincidence for a text.
1515
:param frequencies: dictionary of the form {letter_of_the_alphabet: amount of times it appears in the text as a percentage}
1616
:param length: the length of the text
17-
:return the index of coincidence:
17+
:return: the index of coincidence
18+
>>> index_of_coincidence({'A':1,'D':2,'E':3,'F':1,'H':1,'L': 2,'N':1,'T':1,'W':1}, 13)
19+
0.0641025641025641
1820
"""
1921
index = 0.0
2022
for value in frequencies.values():
21-
index += (value / length) ** 2
22-
return index
23+
index += value * (value-1)
24+
return index / (length * (length-1))
2325

2426

2527
def calculate_indexes_of_coincidence(ciphertext: str, step: int) -> list:
@@ -44,15 +46,18 @@ def calculate_indexes_of_coincidence(ciphertext: str, step: int) -> list:
4446
frequencies[ciphertext[i]] += 1
4547
except KeyError:
4648
frequencies[ciphertext[i]] = 1
47-
indexes_of_coincidence.append(index_of_coincidence(frequencies, c))
49+
if c > 1: # to avoid division by zero in the index_of_coincidence function
50+
indexes_of_coincidence.append(index_of_coincidence(frequencies, c))
4851

4952
return indexes_of_coincidence
5053

5154

5255
def friedman_method(ciphertext: str, max_keylength: int=None) -> int:
5356
"""
5457
Implements Friedman's method for finding the length of the key of a Vigenere cipher. It finds the length with an
55-
index of confidence closer to that of an average text in the english language.
58+
index of confidence closer to that of an average text in the english language. Check the wikipedia page:
59+
https://en.wikipedia.org/wiki/Vigen%C3%A8re_cipher
60+
The algorithm is in the book "Introduction to Cryptography", K. Draziotis https://repository.kallipos.gr/handle/11419/8183
5661
:param ciphertext: a string (text)
5762
:param max_keylength: the maximum length of key that Friedman's method should check, if None then it defaults to the
5863
length of the cipher
@@ -84,7 +89,7 @@ def friedman_method(ciphertext: str, max_keylength: int=None) -> int:
8489

8590

8691
def get_frequencies() -> tuple:
87-
"""Return the values of the global variable @letter_frequencies_dict as a tuple ex. (0.25, 1.42, ...)."""
92+
"""Return the values of the global variable @LETTER_FREQUENCIES_DICT as a tuple ex. (0.25, 1.42, ...)."""
8893
t = tuple(LETTER_FREQUENCIES_DICT[chr(i)] for i in range(ord('A'), ord('A') + 26))
8994
return tuple(num / 100 for num in t)
9095

@@ -97,7 +102,7 @@ def find_key(ciphertext: str, key_length: int) -> str:
97102
multiplies its frequency with the average one and adds them all together, then it shifts the frequencies of the text
98103
cyclically by one position and repeats the process. The shift that produces the largest sum corresponds to a letter
99104
of the key. The whole procedure takes place for every letter of the key (essentially as many times as the length
100-
of the key).
105+
of the key). See here: https://www.youtube.com/watch?v=LaWp_Kq0cKs
101106
:param ciphertext: a string (text)
102107
:param key_length: a supposed length of the key
103108
:return: the key as a string
@@ -133,27 +138,27 @@ def find_key(ciphertext: str, key_length: int) -> str:
133138

134139

135140
def find_key_from_vigenere_cipher(ciphertext: str) -> str:
141+
"""
142+
Tries to find the key length and then the actual key of a Vigenere ciphertext. It uses Friedman's method and
143+
statistical analysis. It works best for large pieces of text written in the english language.
144+
"""
136145
clean_ciphertext = list()
137146
for symbol in ciphertext.upper():
138147
if symbol in LETTERS:
139148
clean_ciphertext.append(symbol)
140149

141150
clean_ciphertext = "".join(clean_ciphertext)
142-
print(clean_ciphertext)
143151

144152
key_length = friedman_method(clean_ciphertext, max_keylength=MAX_KEYLENGTH)
145153
print(f"The length of the key is {key_length}")
146154
if key_length <= 0:
147-
print("Something went wrong while calculating the length of the key.")
148-
return ""
155+
raise ValueError("The length of the key should be strictly positive")
149156

150157
key = find_key(clean_ciphertext, key_length)
151158
return key
152159

153160

154161
if __name__ == '__main__':
155-
# print(index_of_coincidence(LETTER_FREQUENCIES_DICT, 1000))
156-
with open("out.txt") as file:
157-
c = file.read()
162+
c = ""
158163
k = find_key_from_vigenere_cipher(c)
159-
print(k)
164+
print(k)

0 commit comments

Comments
 (0)