-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdetection.py
More file actions
128 lines (85 loc) · 4.22 KB
/
detection.py
File metadata and controls
128 lines (85 loc) · 4.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# this file contains a transform and a detect function
# it is used inthe notebooks to detect a pedestrian in a given image
from helpers import sliding_window, image_pyramid, display_bounding_box
from torchvision import datasets, transforms, models
from imutils.object_detection import non_max_suppression
from torch import nn
import torch
import os
import numpy as np
def prediction_transforms(roi):
'''
transorms the input image to a format that can be read by the network
params:
roi: region of interest (PIL.Image) that gets transformed into tensor
return:
roi: roi in tensor format
'''
pred_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
roi = pred_transform(roi)
return roi
def detect_pedestrian(image, model, verbose=True, slidingWindow_parameters={'height':int(275),'width':int(100),'step_w':50,'step_h':50},imagePyramid_parameters=(224,224),scalefactor=1.5):
'''
detects pedestrians ifrom a given input image
params:
image: PIL Input image in which pedestrians should be detected
model: pytorch model used for prediction
slidingWindow_parameters: Dictionary that contains information about the slliding window (height,width,step size in vertical and horizontal direction)
imagePyramid_parameters: Tuple that contains the smallest size that the image should be scaled to
verbose: Bool to print out the number of roi in which a pedestrian was detected
return:
picks: bounding box coordinates from the prediction
'''
listOfBoundningBoxes = []
listOfProbs =[]
locs = []
rois = []
ImgWidth,imgHeight = image.size
# initialize image pyramid
imgPyramide = image_pyramid(image,minSize=imagePyramid_parameters,scale=scalefactor)
for img in imgPyramide:
# get current scale to rescale roi later
scale = ImgWidth/float(img.size[0])
# create sliding window generator
sliding = sliding_window(img,
window_size=(slidingWindow_parameters['width'],slidingWindow_parameters['height']),
step_w=int(slidingWindow_parameters['step_w']/scale),
step_h=int(slidingWindow_parameters['step_h']/scale))
# initialize sliding window
for slide_img in sliding:
windowBox = slide_img[0]
windowBox= tuple(int(x*scale) for x in windowBox)
locs.append(windowBox)
# prepare region of interest for input into the classifier
roi =slide_img[1].resize((224,int(224*2.5)))
roi = prediction_transforms(roi)
rois.append(roi)
# classify the roi
model.eval()
with torch.no_grad():
rois =torch.stack(rois, dim=0)
predLoader = torch.utils.data.DataLoader(rois,batch_size=8)
sm = torch.nn.Softmax(dim=1)
outputs = []
# split rois to prevent memory overload
for inputs in predLoader:
# use model to classify rois
outputs.append(model(inputs))
outputs =torch.cat(outputs, dim=0)
_, preds = torch.max(outputs.data, 1)
probs= sm(outputs)
# get list of indexes that conatain a pedestrian
indexes = preds.numpy().nonzero()
for index in indexes[0]:
if verbose:
print(f"Detected pedestrian at index {index}.")
listOfBoundningBoxes.append(locs[index])
listOfProbs.append(probs[index][1])
# apply non-maxima suppression to the bounding boxes using a
# fairly large overlap threshold to try to maintain overlapping
# boxes that are still people
rects = np.array([[xmin, ymin, xmax, ymax] for (xmin, ymin, xmax, ymax) in listOfBoundningBoxes])
picks = non_max_suppression(rects, probs=listOfProbs, overlapThresh=0.2)
return picks