-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfrequencyanalyzer.py
More file actions
83 lines (67 loc) · 2.18 KB
/
frequencyanalyzer.py
File metadata and controls
83 lines (67 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from operator import itemgetter
print """
##############################################################
# Alvaro Reyes - Frequency analyzer #
##############################################################
# python frequencyanalyzer.py archivo language #
# Language can be spanish, spanyish or english #
##############################################################
"""
if len(sys.argv) is 1:
print "Howto: enter file to be analyzed and optionally the language you want to compare "
print "It returns a two-lined file with the frequency of the text and the frequency of the language"
exit()
try:
texto = open(sys.argv[1],'r')
except:
print "Failed to read file"
exit()
dicc = []
try:
# Para cada linea del fichero
for linea in texto:
# Pasamos a mayusculas
linea = linea.upper()
# Para cada letra de la linea
for i in range(0,len(linea)):
letra = linea[i]
# Si la letra es una letra como tal
if letra in "ABCDEFGHIJKLMNÑOPQRSTUVWXYZ":
# Comprobamos si ya existe en el array de keys
keys = [x[0] for x in dicc]
if letra in keys:
# Si ya esta, se añade 1 mas a la letra correspondiente
dicc[keys.index(letra)][1] += 1
else:
# Si no esta, se añade una nueva entrada
dicc.append([letra,1])
except:
print "I'm sorry, but I'm not prepared for this kind of text"
texto.close()
exit()
texto.close()
# Lista inversa del diccionario ordenado empezando las ocurrencias y luego por orden alfabetico
keys = [x[0] for x in list(reversed(sorted(dicc, key=itemgetter(1,0))))]
print "".join(keys)
frec = "".join(keys)
language =""
try:
if len(sys.argv) is 3:
p = sys.argv[2]
if p == "spanish":
language = "EAORSNITUCMDLPGQBYVAZFJXKW"
elif p == "spanyish":
language = "EAORSNITUCMDLPGQBYVAZFJÑXKW"
elif p == "english":
language = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
else:
print "Language not registered"
except:
print "Failed to show language frequency"
output = open("output.txt","w")
output.write(frec+"\n")
output.write(language)
output.close()