Skip to content

Commit bdeca81

Browse files
authored
Add files via upload
1 parent 60a5a4a commit bdeca81

File tree

2 files changed

+398
-0
lines changed

2 files changed

+398
-0
lines changed

cnn_keras.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import numpy as np
2+
import pickle
3+
import cv2, os
4+
from glob import glob
5+
from keras import optimizers
6+
from keras.models import Sequential
7+
from keras.layers import Dense
8+
from keras.layers import Dropout
9+
from keras.layers import Flatten
10+
from keras.layers.convolutional import Conv2D
11+
from keras.layers.convolutional import MaxPooling2D
12+
from keras.utils import np_utils
13+
from keras.callbacks import ModelCheckpoint
14+
from keras import backend as K
15+
K.set_image_dim_ordering('tf')
16+
17+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
18+
19+
def get_image_size():
20+
img = cv2.imread('gestures/1/100.jpg', 0)
21+
return img.shape
22+
23+
def get_num_of_classes():
24+
return len(glob('gestures/*'))
25+
26+
image_x, image_y = get_image_size()
27+
28+
def cnn_model():
29+
num_of_classes = get_num_of_classes()
30+
model = Sequential()
31+
model.add(Conv2D(16, (2,2), input_shape=(image_x, image_y, 1), activation='relu'))
32+
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
33+
model.add(Conv2D(32, (3,3), activation='relu'))
34+
model.add(MaxPooling2D(pool_size=(3, 3), strides=(3, 3), padding='same'))
35+
model.add(Conv2D(64, (5,5), activation='relu'))
36+
model.add(MaxPooling2D(pool_size=(5, 5), strides=(5, 5), padding='same'))
37+
model.add(Flatten())
38+
model.add(Dense(128, activation='relu'))
39+
model.add(Dropout(0.2))
40+
model.add(Dense(num_of_classes, activation='softmax'))
41+
sgd = optimizers.SGD(lr=1e-2)
42+
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
43+
filepath="cnn_model_keras2.h5"
44+
checkpoint1 = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
45+
callbacks_list = [checkpoint1]
46+
#from keras.utils import plot_model
47+
#plot_model(model, to_file='model.png', show_shapes=True)
48+
return model, callbacks_list
49+
50+
def train():
51+
with open("train_images", "rb") as f:
52+
train_images = np.array(pickle.load(f))
53+
with open("train_labels", "rb") as f:
54+
train_labels = np.array(pickle.load(f), dtype=np.int32)
55+
56+
with open("val_images", "rb") as f:
57+
val_images = np.array(pickle.load(f))
58+
with open("val_labels", "rb") as f:
59+
val_labels = np.array(pickle.load(f), dtype=np.int32)
60+
61+
train_images = np.reshape(train_images, (train_images.shape[0], image_x, image_y, 1))
62+
val_images = np.reshape(val_images, (val_images.shape[0], image_x, image_y, 1))
63+
train_labels = np_utils.to_categorical(train_labels)
64+
val_labels = np_utils.to_categorical(val_labels)
65+
66+
print(val_labels.shape)
67+
68+
model, callbacks_list = cnn_model()
69+
model.summary()
70+
model.fit(train_images, train_labels, validation_data=(val_images, val_labels), epochs=15, batch_size=500, callbacks=callbacks_list)
71+
scores = model.evaluate(val_images, val_labels, verbose=0)
72+
print("CNN Error: %.2f%%" % (100-scores[1]*100))
73+
#model.save('cnn_model_keras2.h5')
74+
75+
train()
76+
K.clear_session();

fun_util.py

Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
import cv2, pickle
2+
import numpy as np
3+
import tensorflow as tf
4+
from cnn_tf import cnn_model_fn
5+
import os
6+
import sqlite3, pyttsx3
7+
from keras.models import load_model
8+
from threading import Thread
9+
10+
engine = pyttsx3.init()
11+
engine.setProperty('rate', 150)
12+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
13+
model = load_model('cnn_model_keras2.h5')
14+
15+
def get_hand_hist():
16+
with open("hist", "rb") as f:
17+
hist = pickle.load(f)
18+
return hist
19+
20+
def get_image_size():
21+
img = cv2.imread('gestures/0/100.jpg', 0)
22+
return img.shape
23+
24+
image_x, image_y = get_image_size()
25+
26+
def keras_process_image(img):
27+
img = cv2.resize(img, (image_x, image_y))
28+
img = np.array(img, dtype=np.float32)
29+
img = np.reshape(img, (1, image_x, image_y, 1))
30+
return img
31+
32+
def keras_predict(model, image):
33+
processed = keras_process_image(image)
34+
pred_probab = model.predict(processed)[0]
35+
pred_class = list(pred_probab).index(max(pred_probab))
36+
return max(pred_probab), pred_class
37+
38+
def get_pred_text_from_db(pred_class):
39+
conn = sqlite3.connect("gesture_db.db")
40+
cmd = "SELECT g_name FROM gesture WHERE g_id="+str(pred_class)
41+
cursor = conn.execute(cmd)
42+
for row in cursor:
43+
return row[0]
44+
45+
def get_pred_from_contour(contour, thresh):
46+
x1, y1, w1, h1 = cv2.boundingRect(contour)
47+
save_img = thresh[y1:y1+h1, x1:x1+w1]
48+
text = ""
49+
if w1 > h1:
50+
save_img = cv2.copyMakeBorder(save_img, int((w1-h1)/2) , int((w1-h1)/2) , 0, 0, cv2.BORDER_CONSTANT, (0, 0, 0))
51+
elif h1 > w1:
52+
save_img = cv2.copyMakeBorder(save_img, 0, 0, int((h1-w1)/2) , int((h1-w1)/2) , cv2.BORDER_CONSTANT, (0, 0, 0))
53+
pred_probab, pred_class = keras_predict(model, save_img)
54+
if pred_probab*100 > 70:
55+
text = get_pred_text_from_db(pred_class)
56+
return text
57+
58+
def get_operator(pred_text):
59+
try:
60+
pred_text = int(pred_text)
61+
except:
62+
return ""
63+
operator = ""
64+
if pred_text == 1:
65+
operator = "+"
66+
elif pred_text == 2:
67+
operator = "-"
68+
elif pred_text == 3:
69+
operator = "*"
70+
elif pred_text == 4:
71+
operator = "/"
72+
elif pred_text == 5:
73+
operator = "%"
74+
elif pred_text == 6:
75+
operator = "**"
76+
elif pred_text == 7:
77+
operator = ">>"
78+
elif pred_text == 8:
79+
operator = "<<"
80+
elif pred_text == 9:
81+
operator = "&"
82+
elif pred_text == 0:
83+
operator = "|"
84+
return operator
85+
86+
hist = get_hand_hist()
87+
x, y, w, h = 300, 100, 300, 300
88+
is_voice_on = True
89+
90+
def get_img_contour_thresh(img):
91+
img = cv2.flip(img, 1)
92+
imgHSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
93+
dst = cv2.calcBackProject([imgHSV], [0, 1], hist, [0, 180, 0, 256], 1)
94+
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10))
95+
cv2.filter2D(dst,-1,disc,dst)
96+
blur = cv2.GaussianBlur(dst, (11,11), 0)
97+
blur = cv2.medianBlur(blur, 15)
98+
thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
99+
thresh = cv2.merge((thresh,thresh,thresh))
100+
thresh = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY)
101+
thresh = thresh[y:y+h, x:x+w]
102+
contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
103+
return img, contours, thresh
104+
105+
def say_text(text):
106+
if not is_voice_on:
107+
return
108+
while engine._inLoop:
109+
pass
110+
engine.say(text)
111+
engine.runAndWait()
112+
113+
def calculator_mode(cam):
114+
global is_voice_on
115+
flag = {"first": False, "operator": False, "second": False, "clear": False}
116+
count_same_frames = 0
117+
first, operator, second = "", "", ""
118+
pred_text = ""
119+
calc_text = ""
120+
info = "Enter first number"
121+
Thread(target=say_text, args=(info,)).start()
122+
count_clear_frames = 0
123+
while True:
124+
img = cam.read()[1]
125+
img = cv2.resize(img, (640, 480))
126+
img, contours, thresh = get_img_contour_thresh(img)
127+
old_pred_text = pred_text
128+
if len(contours) > 0:
129+
contour = max(contours, key = cv2.contourArea)
130+
if cv2.contourArea(contour) > 10000:
131+
pred_text = get_pred_from_contour(contour, thresh)
132+
if old_pred_text == pred_text:
133+
count_same_frames += 1
134+
else:
135+
count_same_frames = 0
136+
137+
if pred_text == "C":
138+
if count_same_frames > 5:
139+
count_same_frames = 0
140+
first, second, operator, pred_text, calc_text = '', '', '', '', ''
141+
flag['first'], flag['operator'], flag['second'], flag['clear'] = False, False, False, False
142+
info = "Enter first number"
143+
Thread(target=say_text, args=(info,)).start()
144+
145+
elif pred_text == "Best of Luck " and count_same_frames > 15:
146+
count_same_frames = 0
147+
if flag['clear']:
148+
first, second, operator, pred_text, calc_text = '', '', '', '', ''
149+
flag['first'], flag['operator'], flag['second'], flag['clear'] = False, False, False, False
150+
info = "Enter first number"
151+
Thread(target=say_text, args=(info,)).start()
152+
elif second != '':
153+
flag['second'] = True
154+
info = "Clear screen"
155+
#Thread(target=say_text, args=(info,)).start()
156+
second = ''
157+
flag['clear'] = True
158+
try:
159+
calc_text += "= "+str(eval(calc_text))
160+
except:
161+
calc_text = "Invalid operation"
162+
if is_voice_on:
163+
speech = calc_text
164+
speech = speech.replace('-', ' minus ')
165+
speech = speech.replace('/', ' divided by ')
166+
speech = speech.replace('**', ' raised to the power ')
167+
speech = speech.replace('*', ' multiplied by ')
168+
speech = speech.replace('%', ' mod ')
169+
speech = speech.replace('>>', ' bitwise right shift ')
170+
speech = speech.replace('<<', ' bitwise leftt shift ')
171+
speech = speech.replace('&', ' bitwise and ')
172+
speech = speech.replace('|', ' bitwise or ')
173+
Thread(target=say_text, args=(speech,)).start()
174+
elif first != '':
175+
flag['first'] = True
176+
info = "Enter operator"
177+
Thread(target=say_text, args=(info,)).start()
178+
first = ''
179+
180+
elif pred_text != "Best of Luck " and pred_text.isnumeric():
181+
if flag['first'] == False:
182+
if count_same_frames > 15:
183+
count_same_frames = 0
184+
Thread(target=say_text, args=(pred_text,)).start()
185+
first += pred_text
186+
calc_text += pred_text
187+
elif flag['operator'] == False:
188+
operator = get_operator(pred_text)
189+
if count_same_frames > 15:
190+
count_same_frames = 0
191+
flag['operator'] = True
192+
calc_text += operator
193+
info = "Enter second number"
194+
Thread(target=say_text, args=(info,)).start()
195+
operator = ''
196+
elif flag['second'] == False:
197+
if count_same_frames > 15:
198+
Thread(target=say_text, args=(pred_text,)).start()
199+
second += pred_text
200+
calc_text += pred_text
201+
count_same_frames = 0
202+
203+
if count_clear_frames == 30:
204+
first, second, operator, pred_text, calc_text = '', '', '', '', ''
205+
flag['first'], flag['operator'], flag['second'], flag['clear'] = False, False, False, False
206+
info = "Enter first number"
207+
Thread(target=say_text, args=(info,)).start()
208+
count_clear_frames = 0
209+
210+
blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
211+
cv2.putText(blackboard, "Calculator Mode", (100, 50), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 0,0))
212+
cv2.putText(blackboard, "Predicted text- " + pred_text, (30, 100), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0))
213+
cv2.putText(blackboard, "Operator " + operator, (30, 140), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 127))
214+
cv2.putText(blackboard, calc_text, (30, 240), cv2.FONT_HERSHEY_TRIPLEX, 2, (255, 255, 255))
215+
cv2.putText(blackboard, info, (30, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 255, 255) )
216+
if is_voice_on:
217+
cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0))
218+
else:
219+
cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0))
220+
cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2)
221+
res = np.hstack((img, blackboard))
222+
cv2.imshow("Recognizing gesture", res)
223+
cv2.imshow("thresh", thresh)
224+
keypress = cv2.waitKey(1)
225+
if keypress == ord('q') or keypress == ord('t'):
226+
break
227+
if keypress == ord('v') and is_voice_on:
228+
is_voice_on = False
229+
elif keypress == ord('v') and not is_voice_on:
230+
is_voice_on = True
231+
232+
if keypress == ord('t'):
233+
return 1
234+
else:
235+
return 0
236+
237+
def text_mode(cam):
238+
global is_voice_on
239+
text = ""
240+
word = ""
241+
count_same_frame = 0
242+
while True:
243+
img = cam.read()[1]
244+
img = cv2.resize(img, (640, 480))
245+
img, contours, thresh = get_img_contour_thresh(img)
246+
old_text = text
247+
if len(contours) > 0:
248+
contour = max(contours, key = cv2.contourArea)
249+
if cv2.contourArea(contour) > 10000:
250+
text = get_pred_from_contour(contour, thresh)
251+
if old_text == text:
252+
count_same_frame += 1
253+
else:
254+
count_same_frame = 0
255+
256+
if count_same_frame > 20:
257+
if len(text) == 1:
258+
Thread(target=say_text, args=(text, )).start()
259+
word = word + text
260+
if word.startswith('I/Me '):
261+
word = word.replace('I/Me ', 'I ')
262+
elif word.endswith('I/Me '):
263+
word = word.replace('I/Me ', 'me ')
264+
count_same_frame = 0
265+
266+
elif cv2.contourArea(contour) < 1000:
267+
if word != '':
268+
#print('yolo')
269+
#say_text(text)
270+
Thread(target=say_text, args=(word, )).start()
271+
text = ""
272+
word = ""
273+
else:
274+
if word != '':
275+
#print('yolo1')
276+
#say_text(text)
277+
Thread(target=say_text, args=(word, )).start()
278+
text = ""
279+
word = ""
280+
blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
281+
cv2.putText(blackboard, " ", (180, 50), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 0,0))
282+
cv2.putText(blackboard, "Predicted text- " + text, (30, 100), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0))
283+
cv2.putText(blackboard, word, (30, 240), cv2.FONT_HERSHEY_TRIPLEX, 2, (255, 255, 255))
284+
if is_voice_on:
285+
cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0))
286+
else:
287+
cv2.putText(blackboard, " ", (450, 440), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 127, 0))
288+
cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2)
289+
res = np.hstack((img, blackboard))
290+
cv2.imshow("Recognizing gesture", res)
291+
cv2.imshow("thresh", thresh)
292+
keypress = cv2.waitKey(1)
293+
if keypress == ord('q') or keypress == ord('c'):
294+
break
295+
if keypress == ord('v') and is_voice_on:
296+
is_voice_on = False
297+
elif keypress == ord('v') and not is_voice_on:
298+
is_voice_on = True
299+
300+
if keypress == ord('c'):
301+
return 2
302+
else:
303+
return 0
304+
305+
def recognize():
306+
cam = cv2.VideoCapture(1)
307+
if cam.read()[0]==False:
308+
cam = cv2.VideoCapture(0)
309+
text = ""
310+
word = ""
311+
count_same_frame = 0
312+
keypress = 1
313+
while True:
314+
if keypress == 1:
315+
keypress = text_mode(cam)
316+
elif keypress == 2:
317+
keypress = calculator_mode(cam)
318+
else:
319+
break
320+
321+
keras_predict(model, np.zeros((50, 50), dtype = np.uint8))
322+
recognize()

0 commit comments

Comments
 (0)