diff --git a/HandPose/handPoseImage.py b/HandPose/handPoseImage.py index bacdae628..0fec7ba28 100644 --- a/HandPose/handPoseImage.py +++ b/HandPose/handPoseImage.py @@ -3,69 +3,83 @@ import time import numpy as np -protoFile = "hand/pose_deploy.prototxt" -weightsFile = "hand/pose_iter_102000.caffemodel" -nPoints = 22 -POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ] -net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile) -frame = cv2.imread("right-frontal.jpg") -frameCopy = np.copy(frame) -frameWidth = frame.shape[1] -frameHeight = frame.shape[0] -aspect_ratio = frameWidth/frameHeight - -threshold = 0.1 - -t = time.time() -# input image dimensions for the network -inHeight = 368 -inWidth = int(((aspect_ratio*inHeight)*8)//8) -inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False) - -net.setInput(inpBlob) - -output = net.forward() -print("time taken by network : {:.3f}".format(time.time() - t)) - -# Empty list to store the detected keypoints -points = [] - -for i in range(nPoints): - # confidence map of corresponding body's part. - probMap = output[0, i, :, :] - probMap = cv2.resize(probMap, (frameWidth, frameHeight)) - - # Find global maxima of the probMap. - minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) - - if prob > threshold : - cv2.circle(frameCopy, (int(point[0]), int(point[1])), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED) - cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA) - - # Add the point to the list if the probability is greater than the threshold - points.append((int(point[0]), int(point[1]))) - else : - points.append(None) - -# Draw Skeleton -for pair in POSE_PAIRS: - partA = pair[0] - partB = pair[1] - - if points[partA] and points[partB]: - cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2) - cv2.circle(frame, points[partA], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) - cv2.circle(frame, points[partB], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) - - -cv2.imshow('Output-Keypoints', frameCopy) -cv2.imshow('Output-Skeleton', frame) - - -cv2.imwrite('Output-Keypoints.jpg', frameCopy) -cv2.imwrite('Output-Skeleton.jpg', frame) - -print("Total time taken : {:.3f}".format(time.time() - t)) - -cv2.waitKey(0) +class HandPose: + def __init__(self, model, nPoints, pose_pair, input_source): + self.input_source = input_source # the input file name (index, if using a camera) + self.protofile, self.weightfile = model # model and weights for the neural network + self.nPoints = nPoints # number of key points + self.POSE_PAIRS = pose_pair + self.threshold = 0.1 + + def window_size(self, frame): + # to generate the input dimensions from the frame dimension generated by VideoCapture + frameWidth = frame.shape[1] + frameHeight = frame.shape[0] + aspect_ratio = frameWidth / frameHeight + inHeight = 368 + inWidth = int(((aspect_ratio * inHeight) * 8) // 8) + + return inHeight, inWidth # input dimensions + + def draw_skeleton(self, frame, frameCopy, output): + # take the video frame and the output generated by the model + # and create the skeleton using the pairs defined in sef.POSE_PAIR + points = [] + for i in range(self.nPoints): + # confidence map of corresponding body's part. + probMap = output[0, i, :, :] # extract the probability map for the ith feature + probMap = cv2.resize(probMap, (frame.shape[1], frame.shape[0])) + # Find global maxima of the probMap. + minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) + + if prob > self.threshold: + cv2.circle(frameCopy, (int(point[0]), int(point[1])), 6, (0, 255, 255), thickness=-1, + lineType=cv2.FILLED) + cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, + (0, 0, 255), 2, lineType=cv2.LINE_AA) + # Add the point to the list if the probability is greater than the threshold + points.append((int(point[0]), int(point[1]))) + else: + points.append(None) + + for pair in POSE_PAIRS: + partA = pair[0] + partB = pair[1] + if points[partA] and points[partB]: + cv2.line(frame, points[partA], points[partB], (0, 255, 255)) + # draw lines connecting the points of the pair and draw the circles + cv2.circle(frame, points[partA], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + cv2.circle(frame, points[partB], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + + return frame, frameCopy + + def forward(self): + frame = cv2.imread(self.input_source) + frameCopy = np.copy(frame) + inHeight, inWidth = self.window_size(frame) + net = cv2.dnn.readNetFromCaffe(self.protofile, self.weightfile) + t = time.time() + inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), + swapRB=False, crop=False) + net.setInput(inpBlob) + output = net.forward() + print("time taken by network : {:.3f}".format(time.time() - t)) + frame, frameCopy = self.draw_skeleton(frame, frameCopy, output) + cv2.imshow('Output-Keypoints', frameCopy) + cv2.imshow('Output-Skeleton', frame) + cv2.imwrite('Output-Keypoints.jpg', frameCopy) + cv2.imwrite('Output-Skeleton.jpg', frame) + print("Total time taken : {:.3f}".format(time.time() - t)) + cv2.waitKey(0) + + +if __name__ == '__main__': + input_source = "right-frontal.jpg" + protoFile = "hand/pose_deploy.prototxt" + weightsFile = "hand/pose_iter_102000.caffemodel" + nPoints = 22 + POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], + [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] + gen = HandPose((protoFile, weightsFile), nPoints, POSE_PAIRS, input_source=input_source) + gen.forward() diff --git a/HandPose/handPoseVideo.py b/HandPose/handPoseVideo.py index fd09ae1c6..07d63486c 100644 --- a/HandPose/handPoseVideo.py +++ b/HandPose/handPoseVideo.py @@ -3,90 +3,109 @@ import numpy as np -protoFile = "hand/pose_deploy.prototxt" -weightsFile = "hand/pose_iter_102000.caffemodel" -nPoints = 22 -POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ] - -threshold = 0.2 - - -input_source = "asl.mp4" -cap = cv2.VideoCapture(input_source) -hasFrame, frame = cap.read() - -frameWidth = frame.shape[1] -frameHeight = frame.shape[0] - -aspect_ratio = frameWidth/frameHeight - -inHeight = 368 -inWidth = int(((aspect_ratio*inHeight)*8)//8) - -vid_writer = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 15, (frame.shape[1],frame.shape[0])) - -net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile) -k = 0 -while 1: - k+=1 - t = time.time() - hasFrame, frame = cap.read() - frameCopy = np.copy(frame) - if not hasFrame: - cv2.waitKey() - break - - inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), - (0, 0, 0), swapRB=False, crop=False) - - net.setInput(inpBlob) - - output = net.forward() - - print("forward = {}".format(time.time() - t)) - - # Empty list to store the detected keypoints - points = [] - - for i in range(nPoints): - # confidence map of corresponding body's part. - probMap = output[0, i, :, :] - probMap = cv2.resize(probMap, (frameWidth, frameHeight)) - - # Find global maxima of the probMap. - minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) - - if prob > threshold : - cv2.circle(frameCopy, (int(point[0]), int(point[1])), 6, (0, 255, 255), thickness=-1, lineType=cv2.FILLED) - cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, .8, (0, 0, 255), 2, lineType=cv2.LINE_AA) - - # Add the point to the list if the probability is greater than the threshold - points.append((int(point[0]), int(point[1]))) - else : - points.append(None) - - # Draw Skeleton - for pair in POSE_PAIRS: - partA = pair[0] - partB = pair[1] - - if points[partA] and points[partB]: - cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2, lineType=cv2.LINE_AA) - cv2.circle(frame, points[partA], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) - cv2.circle(frame, points[partB], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) - - print("Time Taken for frame = {}".format(time.time() - t)) - - # cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA) - # cv2.putText(frame, "Hand Pose using OpenCV", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 50, 0), 2, lineType=cv2.LINE_AA) - cv2.imshow('Output-Skeleton', frame) - # cv2.imwrite("video_output/{:03d}.jpg".format(k), frame) - key = cv2.waitKey(1) - if key == 27: - break - - print("total = {}".format(time.time() - t)) - - vid_writer.write(frame) - -vid_writer.release() +class HandPose: + def __init__(self, model, nPoints, pose_pair, input_source): + self.input_source = input_source # the input file name (index, if using a camera) + self.protofile, self.weightfile = model # model and weights for the neural network + self.nPoints = nPoints # number of key points + self.POSE_PAIRS = pose_pair + self.threshold = 0.2 + + def window_size(self, frame): + # to generate the input dimensions from the frame dimension generated by VideoCapture + frameWidth = frame.shape[1] + frameHeight = frame.shape[0] + aspect_ratio = frameWidth / frameHeight + inHeight = 368 + inWidth = int(((aspect_ratio * inHeight) * 8) // 8) + + return inHeight, inWidth # input dimensions + + def draw_skeleton(self, frame, frameCopy, output): + # take the video frame and the output generated by the model + # and create the skeleton using the pairs defined in sef.POSE_PAIR + points = [] + for i in range(self.nPoints): + # confidence map of corresponding body's part. + probMap = output[0, i, :, :] # extract the probability map for the ith feature + probMap = cv2.resize(probMap, (frame.shape[1], frame.shape[0])) + # Find global maxima of the probMap. + minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) + + if prob > self.threshold: + cv2.circle(frameCopy, (int(point[0]), int(point[1])), 6, (0, 255, 255), thickness=-1, + lineType=cv2.FILLED) + cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, .8, + (0, 0, 255), 2, lineType=cv2.LINE_AA) + # Add the point to the list if the probability is greater than the threshold + points.append((int(point[0]), int(point[1]))) + else: + points.append(None) + + for pair in POSE_PAIRS: + partA = pair[0] + partB = pair[1] + if points[partA] and points[partB]: + cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2, lineType=cv2.LINE_AA) + # draw lines connecting the points of the pair and draw the circles + cv2.circle(frame, points[partA], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + cv2.circle(frame, points[partB], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + + return frame, frameCopy + + def forward(self): + cap = cv2.VideoCapture(self.input_source) + hasframe, frame = cap.read() + + # generate input dimensions + inHeight, inWidth = self.window_size(frame) + # generate model + net = cv2.dnn.readNetFromCaffe(self.protofile, self.weightfile) + vid_writer = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15, + (frame.shape[1], frame.shape[0])) + k = 0 + + while True: + k = k + 1 + t = time.time() + hasFrame, frame = cap.read() + frameCopy = np.copy(frame) + + if not hasFrame: + cv2.waitKey() + break + + # if cv2.waitKey(1) & 0xFF == 27: + # break + + inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), + swapRB=False, crop=False) + net.setInput(inpBlob) + # forward propagation + output = net.forward() + print("forward = {}".format(time.time() - t)) + frame, frameCopy = self.draw_skeleton(frame, frameCopy, output) + print("Time Taken for frame = {}".format(time.time() - t)) + # cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA) + # cv2.putText(frame, "Hand Pose using OpenCV", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 50, 0), 2, lineType=cv2.LINE_AA) + cv2.imshow('Output-Skeleton', frame) + key = cv2.waitKey(1) + if key == 27: + break + + print("total = {}".format(time.time() - t)) + vid_writer.write(frame) + + vid_writer.release() + + +if __name__ == '__main__': + input_source = "asl.mp4" + # input_source = 0 if using local camera + protoFile = "hand/pose_deploy.prototxt" + weightsFile = "hand/pose_iter_102000.caffemodel" + nPoints = 22 + POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], + [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] + gen = HandPose((protoFile, weightsFile), nPoints, POSE_PAIRS, input_source=input_source) + gen.forward()