Skip to content

Commit 77380c7

Browse files
committed
Lint the code, improve casing.
1 parent ac13ac8 commit 77380c7

File tree

1 file changed

+44
-48
lines changed

1 file changed

+44
-48
lines changed

autoBOTLib/features/features_reading_comperhension.py

Lines changed: 44 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1-
import logging
2-
3-
logging.basicConfig(format='%(asctime)s - %(message)s',
4-
datefmt='%d-%b-%y %H:%M:%S')
5-
logging.getLogger().setLevel(logging.INFO)
6-
1+
import math
2+
import re
73
import pandas as pd
84
import numpy as np
95
import tqdm
106
import nltk
117
from nltk import sent_tokenize, regexp_tokenize
12-
import math
13-
import re
8+
9+
import logging
10+
11+
logging.basicConfig(format='%(asctime)s - %(message)s',
12+
datefmt='%d-%b-%y %H:%M:%S')
13+
logging.getLogger().setLevel(logging.INFO)
1414

1515
def sylco(word):
1616
word = word.lower()
@@ -26,12 +26,12 @@ def sylco(word):
2626
# 2) if doesn't end with "ted" or "tes" or "ses" or "ied" or "ies", discard "es" and "ed" at the end.
2727
# if it has only 1 vowel or 1 set of consecutive vowels, discard. (like "speed", "fled" etc.)
2828
# 4) check if consecutive vowels exists, triplets or pairs, count them as one.
29-
doubleAndtripple = len(re.findall(r'[eaoui][eaoui]', word))
29+
double_and_triple = len(re.findall(r'[eaoui][eaoui]', word))
3030
tripple = len(re.findall(r'[eaoui][eaoui][eaoui]', word))
31-
disc += doubleAndtripple + tripple
31+
disc += double_and_triple + tripple
3232

3333
# 5) count remaining vowels in word.
34-
numVowels = len(re.findall(r'[eaoui]', word))
34+
num_vowels = len(re.findall(r'[eaoui]', word))
3535

3636
# 9) if starts with "tri-" or "bi-" and is followed by a vowel, add one.
3737
if word[:3] == "tri" and len(word) > 3 and word[3] in "aeoui":
@@ -45,113 +45,109 @@ def sylco(word):
4545
# (These rules would be added if needed.)
4646

4747
# calculate the output
48-
return numVowels - disc + syls
48+
return num_vowels - disc + syls
4949

5050

5151
def gfi(text):
5252
# Gunning Fog Index
5353
word_tokens = regexp_tokenize(text, r'\w+')
54-
lengthW = len(word_tokens)
54+
length_w = len(word_tokens)
5555
sents = sent_tokenize(text)
56-
lengthS = len(sents)
56+
length_s = len(sents)
5757

58-
# Check for division by zero (if there are no sentences)
59-
if lengthS == 0 or lengthW == 0:
58+
if length_s == 0 or length_w == 0:
6059
return 0
6160

6261
long_words = [w for w in word_tokens if len(w) > 7]
63-
pl = len(long_words) / lengthW * 100 # percentage long words
64-
GFI = 0.4 * ((lengthW / lengthS) + pl)
65-
return GFI
62+
pl = len(long_words) / length_w * 100 # percentage long words
63+
gfi = 0.4 * ((length_w / length_s) + pl)
64+
return gfi
6665

6766

6867
def fre(text):
6968
# Flesch Reading Ease
7069
word_tokens = regexp_tokenize(text, r'\w+')
71-
lengthW = len(word_tokens)
70+
length_w = len(word_tokens)
7271
sents = sent_tokenize(text)
73-
lengthS = len(sents)
72+
length_s = len(sents)
7473

75-
# Avoid division by zero if no sentences or no words
76-
if lengthS == 0 or lengthW == 0:
74+
if length_s == 0 or length_w == 0:
7775
return 0
7876

7977
ts = 0 # total syllables
8078
for word in word_tokens:
8179
ts += sylco(word)
8280

83-
FRE = 206.835 - 1.015 * (lengthW / lengthS) - 84.6 * (ts / lengthW)
84-
return FRE
81+
fre = 206.835 - 1.015 * (length_w / length_s) - 84.6 * (ts / length_w)
82+
return fre
8583

8684

8785
def fkgl(text):
8886
# Flesch–Kincaid Grade Level
8987
word_tokens = regexp_tokenize(text, r'\w+')
90-
lengthW = len(word_tokens)
88+
length_w = len(word_tokens)
9189
sents = sent_tokenize(text)
92-
lengthS = len(sents)
90+
length_s = len(sents)
9391

94-
# Avoid division by zero if no sentences or no words
95-
if lengthS == 0 or lengthW == 0:
92+
if length_s == 0 or length_w == 0:
9693
return 0
9794

9895
ts = 0 # total syllables
9996
for word in word_tokens:
10097
ts += sylco(word)
10198

102-
FKGL = 0.39 * (lengthW / lengthS) + 11.8 * (ts / lengthW) - 15.59
103-
return FKGL
99+
fkgl = 0.39 * (length_w / length_s) + 11.8 * (ts / length_w) - 15.59
100+
return fkgl
104101

105102

106103
def dcrf(text):
107104
# Dale–Chall Readability Formula
108105
word_tokens = regexp_tokenize(text, r'\w+')
109-
lengthW = len(word_tokens)
106+
length_w = len(word_tokens)
110107
sents = sent_tokenize(text)
111-
lengthS = len(sents)
108+
length_s = len(sents)
112109

113-
# Avoid division by zero
114-
if lengthS == 0 or lengthW == 0:
110+
if length_s == 0 or length_w == 0:
115111
return 0
116112

117113
long_words = [w for w in word_tokens if len(w) > 7]
118-
pl = len(long_words) / lengthW * 100 # percentage of long words
114+
pl = len(long_words) / length_w * 100 # percentage of long words
119115

120-
DCRF = 0.1579 * pl + 0.0496 * (lengthW / lengthS)
121-
return DCRF
116+
dcrf = 0.1579 * pl + 0.0496 * (length_w / length_s)
117+
return dcrf
122118

123119

124120
def ari(text):
125121
# Automated Readability Index
126122
word_tokens = regexp_tokenize(text, r'\w+')
127-
lengthW = len(word_tokens)
123+
length_w = len(word_tokens)
128124
sents = sent_tokenize(text)
129-
lengthS = len(sents)
130-
lengthCH = len(text)
125+
length_s = len(sents)
126+
length_ch = len(text)
131127

132128
# Avoid division by zero
133-
if lengthW == 0 or lengthS == 0:
129+
if length_w == 0 or length_s == 0:
134130
return 0
135131

136-
ARI = 4.71 * (lengthCH / lengthW) + 0.5 * (lengthW / lengthS) - 21.43
137-
return ARI
132+
ari = 4.71 * (length_ch / length_w) + 0.5 * (length_w / length_s) - 21.43
133+
return ari
138134

139135

140136
def smog(text):
141137
# SMOG Index
142138
word_tokens = regexp_tokenize(text, r'\w+')
143139
sents = sent_tokenize(text)
144-
lengthS = len(sents)
140+
length_s = len(sents)
145141

146-
if lengthS == 0:
142+
if length_s == 0:
147143
return 0
148144

149145
tps = 0 # total words with more than 2 syllables
150146
for word in word_tokens:
151147
if sylco(word) > 2:
152148
tps += 1
153149

154-
SMOG = 1.043 * math.sqrt(tps * (30 / lengthS)) + 3.1291
150+
SMOG = 1.043 * math.sqrt(tps * (30 / length_s)) + 3.1291
155151
return SMOG
156152

157153

@@ -209,7 +205,7 @@ def transform(self, new_documents):
209205
210206
"""
211207

212-
if not type(new_documents) == list:
208+
if type(new_documents) is not list:
213209
new_documents.values.tolist()
214210

215211
if self.verbose:

0 commit comments

Comments
 (0)