-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathharris_corner_detection_v5_driver.py
More file actions
executable file
·292 lines (213 loc) · 13.9 KB
/
harris_corner_detection_v5_driver.py
File metadata and controls
executable file
·292 lines (213 loc) · 13.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
from __future__ import division
import pyopencl as cl
import numpy as np
import Image
from PIL import Image
from skimage import color
import pylab
from scipy.ndimage import filters
from mpl_toolkits.mplot3d import axes3d
import matplotlib
import matplotlib.pyplot as plt
import math
from scipy import linalg
import sys
# Call functions from serial implementation code, we use these functions for time comparisons
# and for displaying the output of our openCL GPU code
from harris import Timer, compute_harris_response, run_harris, plot_harris_points, round_up
from harris import check_dim, get_harris_points, plot_harris_points, generate_weights
import time
if __name__ == '__main__':
''' Import the image and the number of iterations'''
if len(sys.argv) > 2:
image = str(sys.argv[1])
num_runs = int(sys.argv[2])
print
print "==================================================="
print "Running Harris Corner detection for", image
print "For the following number of iterations for profiling:", num_runs
else:
image = '1.tif'
num_runs = 100
print 'Default image:', image
print 'Default number of iterations for profiling:', num_runs
#Define the number of runs to get average of run times
output_times_openCL = np.zeros(num_runs)
output_times_serial = np.zeros(num_runs)
#Initalize loop to get average of times
for i in range(num_runs):
# List our platforms
platforms = cl.get_platforms()
# Create a context with all the devices
devices = platforms[0].get_devices()
context = cl.Context(devices)
# Create a queue for transferring data and launching computations.
# Turn on profiling to allow us to check event times.
queue = cl.CommandQueue(context, context.devices[0],
properties=cl.command_queue_properties.PROFILING_ENABLE)
program = cl.Program(context, open('harris_corner_detection_v5.cl').read()).build(options='')
#Load in image to be analyzed
host_image = np.array(Image.open(image).convert('L')).astype(np.float32)[::1, ::1].copy()
#start time after image load for consistancy
start = time.time()
sigma = 1 #Define the standard deviation for the gauussian
#Generate the 1D first dimensional gaussian kernel
filter_kernel_derivative = np.asarray(generate_weights(sigma), order = 1).astype(np.float32)
#Generate the 1D zero derivative gaussian kernel
filter_kernel_zero = np.asarray(generate_weights(sigma, order = 0)).astype(np.float32)
#Determine the length of the entire weight vector based on the sigma of the gaussian
weight_length = len(filter_kernel_derivative) #should be 9 with sigma = 1
#This is the number of neighbors for each analyzed pixel, should be even number
window = (weight_length - 1) #window is 8
#the halo is the number of nieghbors on each side of the analyzed pixelsd
halo = np.int32(window / 2.)
host_image_filtered = np.zeros_like(host_image)
gpu_image_in = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
zero_derivative_out = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
first_derivative_out = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
derivative_kernel_x = cl.Buffer(context, cl.mem_flags.READ_WRITE, filter_kernel_derivative.size * 4)
zero_kernel = cl.Buffer(context, cl.mem_flags.READ_WRITE, filter_kernel_zero.size * 4)
Harris_Matrix = np.zeros_like(host_image)
# Intermediate storage area, between Derivative of Gaussian and Gaussian Filter
local_size = (int(halo), int(halo)) # 2D local_size
global_size = tuple([round_up(g, l) for g, l in zip(host_image.shape[::-1], local_size)]) # shape
width = np.int32(host_image.shape[1])
height = np.int32(host_image.shape[0])
local_memory = cl.LocalMemory(4 * ((local_size[0] + (halo * 2)) * (local_size[1] + (halo * 2))))
local_buffer_zero_1 = cl.LocalMemory(4 * (np.shape(filter_kernel_zero)[0]))
local_buffer_first_1 = cl.LocalMemory(4 * (np.shape(filter_kernel_derivative)[0]))
buf_width = np.int32(local_size[0] + window)
buf_height = np.int32(local_size[1] + window)
cl.enqueue_copy(queue, gpu_image_in, host_image, is_blocking=False)
cl.enqueue_copy(queue, derivative_kernel_x, filter_kernel_derivative, is_blocking=False)
cl.enqueue_copy(queue, zero_kernel, filter_kernel_zero, is_blocking=False)
########################################### First Kernel ##################################
# This Kernel takes the first derivative of a guasisan #
# of the image in the y-direction (axis = 0) and zero Derivatives #
# of a gaussian in the y-direction (axis = 0) #
########################################### First Kernel ##################################
#Execute Derivative of Gaussian Function
program.gaussian_first_axis(
queue, global_size, local_size,
gpu_image_in,
zero_derivative_out,
first_derivative_out,
local_memory, width,
height, buf_width, buf_height, halo,
derivative_kernel_x, zero_kernel,
local_buffer_first_1, local_buffer_zero_1
)
########################################### Second Kernel ##################################
# This Kernel takes the first derivative of a guasisan #
# of the image in the x-direction (axis = 1) and zero Derivatives #
# of a gaussian in the x-direction (axis = 1) #
########################################### Second Kernel ##################################
#allocate local memory buffers for the two filters used in the second kernel
local_memory_axis2_1 = cl.LocalMemory(4 * ((local_size[0] + (halo * 2)) * (local_size[1] + (halo * 2))))
local_memory_axis2_2 = cl.LocalMemory(4 * ((local_size[0] + (halo * 2)) * (local_size[1] + (halo * 2))))
local_buffer_zero_2 = cl.LocalMemory(4 * (np.shape(filter_kernel_zero)[0]))
local_buffer_first_2 = cl.LocalMemory(4 * (np.shape(filter_kernel_derivative)[0]))
#allocate memory for the output of the second kernel
gpu_image_Wxx_derivative_out = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
gpu_image_Wyy_derivative_out = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
gpu_image_Wxy_derivative_out = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
#Execute Derivative of Gaussian Function
program.gaussian_second_axis(
queue, global_size, local_size,
zero_derivative_out,
first_derivative_out,
gpu_image_Wxx_derivative_out,
gpu_image_Wyy_derivative_out,
gpu_image_Wxy_derivative_out,
local_memory_axis2_1, local_memory_axis2_2,
width,
height, buf_width, buf_height, halo,
derivative_kernel_x, zero_kernel,
local_buffer_first_2, local_buffer_zero_2
)
########################################### Third Kernel ###################################
# This Kernel applies a gaussian to the product of the #
# parital derivatives in the y-direction (axis = 0) #
# #
########################################### Third Kernel ###################################
#load in the local memory buffer allocation for all the compents fo the harris matrix
local_memory_filter_Wxx = cl.LocalMemory(4 * ((local_size[0] + (halo * 2)) * (local_size[1] + (halo * 2))))
local_memory_filter_Wyy = cl.LocalMemory(4 * ((local_size[0] + (halo * 2)) * (local_size[1] + (halo * 2))))
local_memory_filter_Wxy = cl.LocalMemory(4 * ((local_size[0] + (halo * 2)) * (local_size[1] + (halo * 2))))
local_buffer_zero_3 = cl.LocalMemory(4 * (np.shape(filter_kernel_zero)[0]))
#allocate memory for the output of the third kernel
gpu_image_Wxx_third_out = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
gpu_image_Wyy_third_out = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
gpu_image_Wxy_third_out = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
# Execute gaussian filter on all component matrices and calculate Harris Matrix
program.filter_first_axis_second_pass(
queue, global_size, local_size,
gpu_image_Wxx_derivative_out,
gpu_image_Wyy_derivative_out,
gpu_image_Wxy_derivative_out,
gpu_image_Wxx_third_out,
gpu_image_Wyy_third_out,
gpu_image_Wxy_third_out,
local_memory_filter_Wxx,
local_memory_filter_Wyy,
local_memory_filter_Wxy,
halo, width, height, buf_width, buf_height,
zero_kernel, local_buffer_zero_3
)
########################################### Fourth Kernel ##################################
# This Kernel applies a gaussian to the product of the #
# parital derivatives in the x-direction (axis = 1) #
# and computes the final Harris Matrix for the output #
########################################### Fourth Kernel ##################################
#Allocate local memory buffer for fourth kernel
local_memory_filter_Wxx_2 = cl.LocalMemory(4 * ((local_size[0] + (halo * 2)) * (local_size[1] + (halo * 2))))
local_memory_filter_Wyy_2 = cl.LocalMemory(4 * ((local_size[0] + (halo * 2)) * (local_size[1] + (halo * 2))))
local_memory_filter_Wxy_2 = cl.LocalMemory(4 * ((local_size[0] + (halo * 2)) * (local_size[1] + (halo * 2))))
local_buffer_zero_4 = cl.LocalMemory(4 * (np.shape(filter_kernel_zero)[0]))
# Allocate memory to store output from fourth kernel, this is the final Harris Matrix
gpu_image_filter_out = cl.Buffer(context, cl.mem_flags.READ_WRITE, host_image.size * 4)
program.filter_second_axis_second_pass(
queue, global_size, local_size,
gpu_image_Wxx_third_out,
gpu_image_Wyy_third_out,
gpu_image_Wxy_third_out,
gpu_image_filter_out,
local_memory_filter_Wxx_2,
local_memory_filter_Wyy_2,
local_memory_filter_Wxy_2,
halo, width, height, buf_width, buf_height,
zero_kernel, local_buffer_zero_4
)
#Output the final Harris Matrix to the CPU
cl.enqueue_copy(queue, Harris_Matrix, gpu_image_filter_out, is_blocking=False)
points = get_harris_points(Harris_Matrix)
end = time.time()
#Store the time to run the entire openCL version
output_times_openCL[i] = end - start
#Store the time to run the entire serial version
with Timer() as serial_time:
harris = run_harris(host_image)
output_times_serial[i] = serial_time.interval
######################################################################
# test comparision for accuracy vs. harris.py Serial implementation by
# "Programming Computer Vision with Python" by Jan Erik Solem
######################################################################
print '-------------Check Plots: Saved to the Directory--------------------------'
plot_harris_points(host_image, points, im_name = 'Harris openCL Image')
response = compute_harris_response(host_image, sigma=1)
serial_points = get_harris_points(response, min_dist=10, threshold=0.1)
plot_harris_points(host_image, serial_points, im_name = 'Harris Serial Image')
print '--------------------------------------------------------------------------'
print '-------------Check For Correctness----------------------------------------'
pt_x = np.random.randint(np.shape(host_image)[0])
pt_y = np.random.randint(np.shape(host_image)[1])
print 'openCL Harris Matrix Random Point Check:', Harris_Matrix[pt_x, pt_y]
print 'Serial Baseline Harris Matrix Random Point Check:', response[pt_x, pt_y]
print 'Number of openCL points:', np.shape(points)
print 'Number of Serial Points:', np.shape(serial_points)
print 'Are the two lists of corner points the same?', (np.array(serial_points) == np.array(points)).all()
print '--------------------------------------------------------------------------'
print '-------------Check Timing Comparision-------------------------------------'
print 'Time to run openCL', output_times_openCL.mean()
print 'Time to run Serial', output_times_serial.mean()
print '--------------------------------------------------------------------------'