2D-Pose-Estimation/pose.py at master · rishipandey125/2D-Pose-Estimation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import cv2
import numpy as np
import pandas as pd
from scipy import signal

#A List of Corresponding Joints
skeletonPairs = [[0,1], [1,2], [2,3], [3,4], [1,5], [5,6], [6,7], [1,14], [14,8], [8,9], [9,10], [14,11], [11,12], [12,13]]

'''
Anaylyze Keypoints Function
Uses Video Input to Return List of Keypoints for Each Frame
'''

def analyzeKeyPoints(path):
    # Paths for the CNN (on local machine)
    protoFile = "/Users/rishipandey125/Desktop/code/pose_estimation_model/pose_deploy_linevec_faster_4_stages.prototxt.txt"
    weightsFile = "/Users/rishipandey125/Desktop/code/pose_estimation_model/pose_iter_160000.caffemodel"

    # Reading the CNN
    network = cv2.dnn.readNetFromCaffe(protoFile,weightsFile)

    baseVideo = cv2.VideoCapture(path)
    #boolean stating there is a next frame, and storing the next frame in the variable frame
    hasFrame,frame = baseVideo.read()

    #corresponding joints data for swapping
    correspondingJoints = [[2,5],[3,6],[4,7],[8,11],[9,12],[10,13]]
    keyPoints = []
    while hasFrame:
        imgHeight, imgWidth = frame.shape[0], frame.shape[1]

        #Prep Input Image for Network
        inWidth, inHeight = 368, 368
        inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
        network.setInput(inpBlob)

        #Output Matrix of CNN given the input Image
        output = network.forward()

        #height and width of output
        height,width = output.shape[2], output.shape[3]

        numKeyPoints = 15
        corresponding_index = 0
        x_keyPoints, y_keyPoints = [], []
        for i in range(numKeyPoints):
            confidenceMap = output[0,i,:,:]
            #only using prob and point
            minVal, prob, minLoc, point = cv2.minMaxLoc(confidenceMap)
            #KeyPoint in Threshold or just grab data for the first frame
            if prob > 0.1 or len(keyPoints) == 0:
                #scale x and y values
                x = int((imgWidth*point[0])/width)
                y = int((imgHeight*point[1])/height)
                x_keyPoints.append(x)
                y_keyPoints.append(y)
                #index in bounds to check for swap?
                if corresponding_index < len(correspondingJoints):
                    #at the right index?
                    if i == correspondingJoints[corresponding_index][1]:
                        previousPoint = correspondingJoints[corresponding_index][0]
                        corresponding_index += 1
                        if x_keyPoints[previousPoint] > x_keyPoints[i]: #swap!
                            tempPoint_x, tempPoint_y = x_keyPoints[i], y_keyPoints[i]
                            x_keyPoints[i], y_keyPoints[i] = x_keyPoints[previousPoint], x_keyPoints[previousPoint]
                            x_keyPoints[previousPoint], y_keyPoints[previousPoint] = tempPoint_x, tempPoint_y
            else:
                x_keyPoints.append(previous_x[i])
                y_keyPoints.append(previous_y[i])
        # updating frame for next iteration
        previous_x, previous_y = x_keyPoints, y_keyPoints
        keyPoints.append(x_keyPoints + y_keyPoints)
        hasFrame,frame = baseVideo.read()

    return keyPoints

'''
Helper function to Smooth Pose Estimation Data
Uses Savgol Smoothing
'''
def smoothData(data):
    df = pd.DataFrame.from_records(data)
    # smooth data!
    for x in range(len(data[0])):
        #window_length = 15 and polyorder = 2
        df[x] = signal.savgol_filter(df[x], 15, 2)
    return df

'''
Draw Skeleton Function
Uses Video Path to Draw a Skeleton Over the Tracked Person
'''
def drawSkeleton(path):
    #video object from path
    video = cv2.VideoCapture(path)
    hasFrame,frame = video.read()
    #match this to input framrate
    outputFrameRate = 24
    outputVideo = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), outputFrameRate, (frame.shape[1],frame.shape[0]))
    data = analyzeKeyPoints(path)
    #data smoothed
    df = smoothData(data)
    frameCount = 0
    while hasFrame and frameCount < len(df[0]):
        for pair in skeletonPairs:
            point1 = pair[0]
            point2 = pair[1]
            cord1 = tuple(np.array([df[point1][frameCount],df[point1+15][frameCount]],int))
            cord2 = tuple(np.array([df[point2][frameCount],df[point2+15][frameCount]],int))
            #draws skeleton on given frame
            cv2.circle(frame, cord1, 20, (255, 0, 0), thickness=-1, lineType=cv2.FILLED)
            cv2.putText(frame, "{}".format(point1), cord1, cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 2, lineType=cv2.LINE_AA)
            cv2.circle(frame, cord2, 20, (255, 0, 0), thickness=-1, lineType=cv2.FILLED)
            cv2.putText(frame, "{}".format(point2), cord2, cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 2, lineType=cv2.LINE_AA)
            cv2.line(frame, cord1, cord2, (0, 0, 255), 10)
        outputVideo.write(frame)
        frameCount += 1
        #updates for the next frame
        hasFrame,frame = video.read()
    outputVideo.release()

'''
Returns Organized Smoothed Pose Data as List of Tuples for each Frame
'''
def poseData(path):
    data = analyzeKeyPoints(path)
    df = smoothData(data)
    numKeyPoints = 15
    pose2D = []
    for frameIndex in range(len(df)):
        frameList = []
        for x in range(numKeyPoints):
            frameList.append([df[x][frameIndex],df[x+15][frameIndex]])
        pose2D.append(frameList)
    return pose2D