Pedestrian-Detection/detection.py at master · ptr-br/Pedestrian-Detection · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# this file contains a transform and a detect function
# it is used inthe notebooks to detect a pedestrian in a given image


from helpers import sliding_window, image_pyramid, display_bounding_box
from torchvision import datasets, transforms, models
from imutils.object_detection import non_max_suppression
from torch import nn
import torch
import os
import numpy as np

def prediction_transforms(roi):
    '''
    transorms the input image to a format that can be read by the network

    params:
            roi: region of interest (PIL.Image) that gets transformed into tensor

    return:
            roi: roi in tensor format
    '''


    pred_transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    roi = pred_transform(roi)

    return roi

def detect_pedestrian(image, model, verbose=True, slidingWindow_parameters={'height':int(275),'width':int(100),'step_w':50,'step_h':50},imagePyramid_parameters=(224,224),scalefactor=1.5):
    '''
    detects pedestrians ifrom a given input image

    params:
            image: PIL Input image in which pedestrians should be detected
            model: pytorch model used for prediction
            slidingWindow_parameters: Dictionary that contains information about the slliding window (height,width,step size in vertical and horizontal direction)
            imagePyramid_parameters: Tuple that contains the smallest size that the image should be scaled to
            verbose: Bool to print out the number of roi in which a pedestrian was detected

    return:
            picks: bounding box coordinates from the prediction

    '''

    listOfBoundningBoxes = []
    listOfProbs =[]

    locs = []
    rois = []

    ImgWidth,imgHeight = image.size

    # initialize image pyramid
    imgPyramide = image_pyramid(image,minSize=imagePyramid_parameters,scale=scalefactor)

    for img in imgPyramide:

        # get current scale to rescale roi later
        scale = ImgWidth/float(img.size[0])

        # create sliding window generator
        sliding = sliding_window(img,
                                 window_size=(slidingWindow_parameters['width'],slidingWindow_parameters['height']),
                                 step_w=int(slidingWindow_parameters['step_w']/scale),
                                 step_h=int(slidingWindow_parameters['step_h']/scale))


        # initialize sliding window
        for slide_img in sliding:

            windowBox = slide_img[0]
            windowBox= tuple(int(x*scale) for x in windowBox)

            locs.append(windowBox)


            # prepare region of interest for input into the classifier
            roi =slide_img[1].resize((224,int(224*2.5)))
            roi = prediction_transforms(roi)
            rois.append(roi)


    # classify the roi
    model.eval()
    with torch.no_grad():
        rois =torch.stack(rois, dim=0)


        predLoader = torch.utils.data.DataLoader(rois,batch_size=8)
        sm = torch.nn.Softmax(dim=1)
        outputs = []

        # split rois to prevent memory overload
        for inputs in predLoader:
            # use model to classify rois
            outputs.append(model(inputs))
        outputs =torch.cat(outputs, dim=0)

        _, preds = torch.max(outputs.data, 1)
        probs= sm(outputs)


        # get list of indexes that conatain a pedestrian
        indexes = preds.numpy().nonzero()

        for index in indexes[0]:
            if verbose:
                print(f"Detected pedestrian at index {index}.")
            listOfBoundningBoxes.append(locs[index])
            listOfProbs.append(probs[index][1])


    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    rects = np.array([[xmin, ymin, xmax, ymax] for (xmin, ymin, xmax, ymax) in listOfBoundningBoxes])
    picks = non_max_suppression(rects, probs=listOfProbs, overlapThresh=0.2)

    return picks