-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathblink_detector.py
More file actions
290 lines (235 loc) · 9.08 KB
/
blink_detector.py
File metadata and controls
290 lines (235 loc) · 9.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import cv2
import dlib
import numpy as np
from keras.models import load_model
from scipy.spatial import distance as dist
from imutils import face_utils
from pyautogui import typewrite
import pyttsx3
engine = pyttsx3.init() # object creation
""" RATE"""
rate = engine.getProperty('rate') # getting details of current speaking rate
print (rate) #printing current voice rate
engine.setProperty('rate', 150) # setting up new voice rate
"""VOLUME"""
volume = engine.getProperty('volume') #getting to know current volume level (min=0 and max=1)
print (volume) #printing current volume level
engine.setProperty('volume',1.0) # setting up volume level between 0 and 1
"""VOICE"""
voices = engine.getProperty('voices') #getting details of current voice
#engine.setProperty('voice', voices[0].id) #changing index, changes voices. o for male
engine.setProperty('voice', voices[0].id) #changing index, changes voices. 1 for female
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml')
morse_string = ''
MORSE_CODE_DICT = {'A': '.-', 'B': '-...',
'C': '-.-.', 'D': '-..', 'E': '.',
'F': '..-.', 'G': '--.', 'H': '....',
'I': '..', 'J': '.---', 'K': '-.-',
'L': '.-..', 'M': '--', 'N': '-.',
'O': '---', 'P': '.--.', 'Q': '--.-',
'R': '.-.', 'S': '...', 'T': '-',
'U': '..-', 'V': '...-', 'W': '.--',
'X': '-..-', 'Y': '-.--', 'Z': '--..',
'1': '.----', '2': '..---', '3': '...--',
'4': '....-', '5': '.....', '6': '-....',
'7': '--...', '8': '---..', '9': '----.',
'0': '-----', ', ': '--..--', '.': '.-.-.-',
'?': '..--..', '/': '-..-.', '-': '-....-',
'(': '-.--.', ')': '-.--.-', '': ''}
# detect the face rectangle
def detect(img, cascade=face_cascade, minimumFeatureSize=(20, 20)):
if cascade.empty():
raise (Exception("There was a problem loading your Haar Cascade xml file."))
rects = cascade.detectMultiScale(img, scaleFactor=1.3, minNeighbors=1, minSize=minimumFeatureSize)
# if it doesn't return rectangle return array
# with zero lenght
if len(rects) == 0:
return []
# convert last coord from (width,height) to (maxX, maxY)
rects[:, 2:] += rects[:, :2]
return rects
def cropEyes(frame):
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# detect the face at grayscale image
te = detect(gray, minimumFeatureSize=(80, 80))
# if the face detector doesn't detect face
# return None, else if detects more than one faces
# keep the bigger and if it is only one keep one dim
if len(te) == 0:
return None
elif len(te) > 1:
face = te[0]
elif len(te) == 1:
[face] = te
# keep the face region from the whole frame
face_rect = dlib.rectangle(left=int(face[0]), top=int(face[1]),
right=int(face[2]), bottom=int(face[3]))
# determine the facial landmarks for the face region
shape = predictor(gray, face_rect)
shape = face_utils.shape_to_np(shape)
# grab the indexes of the facial landmarks for the left and
# right eye, respectively
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
# extract the left and right eye coordinates
leftEye = shape[lStart:lEnd]
rightEye = shape[rStart:rEnd]
# keep the upper and the lower limit of the eye
# and compute the height
l_uppery = min(leftEye[1:3, 1])
l_lowy = max(leftEye[4:, 1])
l_dify = abs(l_uppery - l_lowy)
# compute the width of the eye
lw = (leftEye[3][0] - leftEye[0][0])
# we want the image for the cnn to be (26,34)
# so we add the half of the difference at x and y
# axis from the width at height respectively left-right
# and up-down
minxl = (leftEye[0][0] - ((34 - lw) / 2))
maxxl = (leftEye[3][0] + ((34 - lw) / 2))
minyl = (l_uppery - ((26 - l_dify) / 2))
maxyl = (l_lowy + ((26 - l_dify) / 2))
# crop the eye rectangle from the frame
left_eye_rect = np.rint([minxl, minyl, maxxl, maxyl])
left_eye_rect = left_eye_rect.astype(int)
left_eye_image = gray[(left_eye_rect[1]):left_eye_rect[3], (left_eye_rect[0]):left_eye_rect[2]]
# same as left eye at right eye
r_uppery = min(rightEye[1:3, 1])
r_lowy = max(rightEye[4:, 1])
r_dify = abs(r_uppery - r_lowy)
rw = (rightEye[3][0] - rightEye[0][0])
minxr = (rightEye[0][0] - ((34 - rw) / 2))
maxxr = (rightEye[3][0] + ((34 - rw) / 2))
minyr = (r_uppery - ((26 - r_dify) / 2))
maxyr = (r_lowy + ((26 - r_dify) / 2))
right_eye_rect = np.rint([minxr, minyr, maxxr, maxyr])
right_eye_rect = right_eye_rect.astype(int)
right_eye_image = gray[right_eye_rect[1]:right_eye_rect[3], right_eye_rect[0]:right_eye_rect[2]]
# if it doesn't detect left or right eye return None
if 0 in left_eye_image.shape or 0 in right_eye_image.shape:
return None
# resize for the conv net
left_eye_image = cv2.resize(left_eye_image, (34, 26))
right_eye_image = cv2.resize(right_eye_image, (34, 26))
right_eye_image = cv2.flip(right_eye_image, 1)
# return left and right eye
return left_eye_image, right_eye_image
# make the image to have the same format as at training
def cnnPreprocess(img):
img = img.astype('float32')
img /= 255
img = np.expand_dims(img, axis=2)
img = np.expand_dims(img, axis=0)
return img
def decode(msg: str):
"""
:param msg: string of length 2
:return:
"""
if msg == 'LL':
return '-'
elif msg == 'LS':
return '.'
elif msg == 'SS':
return ' '
else:
return ' '
def decrypt(message):
decipher = ''
citext = ''
i = 0
for letter in message:
# checks for space
if (letter != ' '):
# counter to keep track of space
i = 0
# storing morse code of a single character
citext += letter
# in case of space
else:
# if i = 1 that indicates a new character
i += 1
# if i = 2 that indicates a new word
if i == 2:
# adding space to separate words
decipher += ' '
else:
# accessing the keys using their values (reverse of encryption)
decipher += list(MORSE_CODE_DICT.keys())[list(MORSE_CODE_DICT
.values()).index(citext)]
citext = ''
return decipher
def main():
# open the camera,load the cnn model
camera = cv2.VideoCapture(0)
model = load_model('blinkModel.hdf5')
# blinks is the number of total blinks ,close_counter
# the counter for consecutive close predictions
# and mem_counter the counter of the previous loop
close_counter = open_counter = blinks = mem_counter = 0
message = ''
morse = ''
while True:
ret, frame = camera.read()
state = ''
# detect eyes
eyes = cropEyes(frame)
if eyes is None:
continue
else:
left_eye, right_eye = eyes
# average the predictions of the two eyes
prediction = (model.predict(cnnPreprocess(left_eye)) + model.predict(cnnPreprocess(right_eye))) / 2.0
# blinks
# if the eyes are open reset the counter for close eyes
if prediction > 0.5:
state = 'open'
else:
state = 'close'
if state == 'close':
close_counter += 1
continue
else: # state == open
open_counter += 1
if close_counter < 2:
close_counter = 0
continue
if close_counter > 1 and close_counter < 9:
blinks += 1
message += 'S'
close_counter = 0
if close_counter > 8:
blinks += 1
message += 'L'
close_counter = 0
if (len(message) == 2):
morse += decode(message)
message = ''
if morse[-2:] == ' ':
print(decrypt(morse))
engine.say(decrypt(morse))
# engine.say('My current speaking rate is ' + str(rate))
engine.runAndWait()
engine.stop()
INPUT_STRING = (decrypt(morse))
for char in '{0}'.format(INPUT_STRING):
typewrite(char)
morse = ''
# draw the total number of blinks on the frame along with
# the state for the frame
cv2.putText(frame, "Blinks: {}".format(blinks), (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(frame, "State: {}".format(state), (300, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
# show the frame
cv2.imshow('blinks counter', frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord('q'):
break
# do a little clean up
cv2.destroyAllWindows()
del (camera)
if __name__ == '__main__':
main()