@@ -2,7 +2,7 @@ function max_pooling2d_fwd!(x::AbstractArray{T,4}, y::AbstractArray{T,4},
2
2
width:: Int , height:: Int , channels:: Int , num:: Int , pooled_width:: Int ,
3
3
pooled_height:: Int , kernel_w:: Int , kernel_h:: Int , pad_w:: Int , pad_h:: Int ,
4
4
stride_w:: Int , stride_h:: Int ) where T
5
- for n = 1 : num, c = 1 : channels, ph = 1 : pooled_height, pw = 1 : pooled_width
5
+ @inbounds for n = 1 : num, c = 1 : channels, ph = 1 : pooled_height, pw = 1 : pooled_width
6
6
hstart = (ph - 1 )* stride_h - pad_h
7
7
wstart = (pw - 1 )* stride_w - pad_w
8
8
hend = min (hstart + kernel_h, height)
@@ -11,7 +11,11 @@ function max_pooling2d_fwd!(x::AbstractArray{T,4}, y::AbstractArray{T,4},
11
11
hstart = max (hstart, 0 ) + 1
12
12
wstart = max (wstart, 0 ) + 1
13
13
14
- y[pw, ph, c, n] = maximum (x[wstart: wend, hstart: hend, c, n])
14
+ m = typemin (T)
15
+ for j in hstart: hend, i in wstart: wend
16
+ m = max (x[i, j, c, n])
17
+ end
18
+ y[pw, ph, c, n] = m
15
19
end
16
20
end
17
21
@@ -69,7 +73,7 @@ function mean_pooling2d_fwd!(x::AbstractArray{T,4}, y::AbstractArray{T,4},
69
73
pooled_height:: Int , kernel_w:: Int , kernel_h:: Int ,pad_w:: Int , pad_h:: Int ,
70
74
stride_w:: Int , stride_h:: Int ) where T
71
75
kernel_size = kernel_w * kernel_h
72
- for n = 1 : num, c = 1 : channels, ph = 1 : pooled_height, pw = 1 : pooled_width
76
+ @inbounds for n = 1 : num, c = 1 : channels, ph = 1 : pooled_height, pw = 1 : pooled_width
73
77
hstart = (ph - 1 ) * stride_h - pad_h
74
78
wstart = (pw - 1 ) * stride_w - pad_w
75
79
hend = min (hstart + kernel_h, height)
@@ -78,7 +82,11 @@ function mean_pooling2d_fwd!(x::AbstractArray{T,4}, y::AbstractArray{T,4},
78
82
hstart = max (hstart, 0 ) + 1
79
83
wstart = max (wstart, 0 ) + 1
80
84
81
- y[pw, ph, c, n] = sum (x[wstart: wend, hstart: hend, c, n]) / kernel_size
85
+ s = zero (T)
86
+ for j in hstart: hend, i in wstart: wend
87
+ s += x[i, j, c, n]
88
+ end
89
+ y[pw, ph, c, n] = s / kernel_size
82
90
end
83
91
end
84
92
@@ -132,7 +140,7 @@ function max_pooling3d_fwd!(x::AbstractArray{T,5}, y::AbstractArray{T,5},
132
140
width:: Int , height:: Int , depth:: Int , channels:: Int , num:: Int , pooled_width:: Int ,
133
141
pooled_height:: Int , pooled_depth:: Int , kernel_w:: Int , kernel_h:: Int , kernel_d:: Int ,
134
142
pad_w:: Int , pad_h:: Int , pad_d:: Int , stride_w:: Int , stride_h:: Int , stride_d:: Int ) where T
135
- for n = 1 : num, c = 1 : channels, pd = 1 : pooled_depth, ph = 1 : pooled_height, pw = 1 : pooled_width
143
+ @inbounds for n = 1 : num, c = 1 : channels, pd = 1 : pooled_depth, ph = 1 : pooled_height, pw = 1 : pooled_width
136
144
dstart = (pd - 1 )* stride_d - pad_d
137
145
hstart = (ph - 1 )* stride_h - pad_h
138
146
wstart = (pw - 1 )* stride_w - pad_w
@@ -145,8 +153,11 @@ function max_pooling3d_fwd!(x::AbstractArray{T,5}, y::AbstractArray{T,5},
145
153
hstart = max (hstart, 0 ) + 1
146
154
wstart = max (wstart, 0 ) + 1
147
155
148
- y[pw, ph, pd, c, n] =
149
- maximum (x[wstart: wend, hstart: hend, dstart: dend, c, n])
156
+ m = typemin (T)
157
+ for k in dstart: dend, j in hstart: hend, i in wstart: wend
158
+ m = max (x[i, j, k, c, n])
159
+ end
160
+ y[pw, ph, pd, c, n] = m
150
161
end
151
162
end
152
163
@@ -213,7 +224,7 @@ function mean_pooling3d_fwd!(x::AbstractArray{T,5}, y::AbstractArray{T,5},
213
224
214
225
kernel_size = kernel_w * kernel_h * kernel_d
215
226
# pragma omp parallel for
216
- for n = 1 : num, c = 1 : channels, pd = 1 : pooled_depth, ph = 1 : pooled_height, pw = 1 : pooled_width
227
+ @inbounds for n = 1 : num, c = 1 : channels, pd = 1 : pooled_depth, ph = 1 : pooled_height, pw = 1 : pooled_width
217
228
dstart = (pd - 1 ) * stride_d - pad_d
218
229
hstart = (ph - 1 ) * stride_h - pad_h
219
230
wstart = (pw - 1 ) * stride_w - pad_w
@@ -226,8 +237,11 @@ function mean_pooling3d_fwd!(x::AbstractArray{T,5}, y::AbstractArray{T,5},
226
237
hstart = max (hstart, 0 ) + 1
227
238
wstart = max (wstart, 0 ) + 1
228
239
229
- y[pw, ph, pd, c, n] =
230
- sum (x[wstart: wend, hstart: hend, dstart: dend, c, n]) / kernel_size
240
+ s = zero (T)
241
+ for k in dstart: dend, j in hstart: hend, i in wstart: wend
242
+ s += x[i, j, k, c, n]
243
+ end
244
+ y[pw, ph, pd, c, n] = s / kernel_size
231
245
end
232
246
end
233
247
0 commit comments