Skip to content

Commit b1e5699

Browse files
authored
Merge pull request #15901 from heavengate/release/1.3
cherry-pick pool/adaptive_pool/yolov3_loss doc fix
2 parents 4b3f9e5 + 64e3356 commit b1e5699

File tree

4 files changed

+201
-100
lines changed

4 files changed

+201
-100
lines changed

paddle/fluid/operators/detection/yolov3_loss_op.cc

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -144,34 +144,40 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
144144
"The ignore threshold to ignore confidence loss.")
145145
.SetDefault(0.7);
146146
AddComment(R"DOC(
147-
This operator generate yolov3 loss by given predict result and ground
147+
This operator generates yolov3 loss based on given predict result and ground
148148
truth boxes.
149149
150150
The output of previous network is in shape [N, C, H, W], while H and W
151-
should be the same, specify the grid size, each grid point predict given
152-
number boxes, this given number is specified by anchors, it should be
153-
half anchors length, which following will be represented as S. In the
154-
second dimention(the channel dimention), C should be S * (class_num + 5),
155-
class_num is the box categoriy number of source dataset(such as coco),
156-
so in the second dimention, stores 4 box location coordinates x, y, w, h
157-
and confidence score of the box and class one-hot key of each anchor box.
151+
should be the same, H and W specify the grid size, each grid point predict
152+
given number boxes, this given number, which following will be represented as S,
153+
is specified by the number of anchors, In the second dimension(the channel
154+
dimension), C should be equal to S * (class_num + 5), class_num is the object
155+
category number of source dataset(such as 80 in coco dataset), so in the
156+
second(channel) dimension, apart from 4 box location coordinates x, y, w, h,
157+
also includes confidence score of the box and class one-hot key of each anchor box.
158158
159-
While the 4 location coordinates if $$tx, ty, tw, th$$, the box predictions
160-
correspnd to:
159+
Assume the 4 location coordinates are :math:`t_x, t_y, t_w, t_h`, the box predictions
160+
should be as follows:
161161
162162
$$
163-
b_x = \sigma(t_x) + c_x
164-
b_y = \sigma(t_y) + c_y
163+
b_x = \\sigma(t_x) + c_x
164+
$$
165+
$$
166+
b_y = \\sigma(t_y) + c_y
167+
$$
168+
$$
165169
b_w = p_w e^{t_w}
170+
$$
171+
$$
166172
b_h = p_h e^{t_h}
167173
$$
168174
169-
While $$c_x, c_y$$ is the left top corner of current grid and $$p_w, p_h$$
170-
is specified by anchors.
175+
In the equation above, :math:`c_x, c_y` is the left top corner of current grid
176+
and :math:`p_w, p_h` is specified by anchors.
171177
172178
As for confidence score, it is the logistic regression value of IoU between
173179
anchor boxes and ground truth boxes, the score of the anchor box which has
174-
the max IoU should be 1, and if the anchor box has IoU bigger then ignore
180+
the max IoU should be 1, and if the anchor box has IoU bigger than ignore
175181
thresh, the confidence score loss of this anchor box will be ignored.
176182
177183
Therefore, the yolov3 loss consist of three major parts, box location loss,
@@ -186,13 +192,13 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
186192
187193
In order to trade off box coordinate losses between big boxes and small
188194
boxes, box coordinate losses will be mutiplied by scale weight, which is
189-
calculated as follow.
195+
calculated as follows.
190196
191197
$$
192198
weight_{box} = 2.0 - t_w * t_h
193199
$$
194200
195-
Final loss will be represented as follow.
201+
Final loss will be represented as follows.
196202
197203
$$
198204
loss = (loss_{xy} + loss_{wh}) * weight_{box}

paddle/fluid/operators/pool_op.cc

Lines changed: 87 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,10 @@ void Pool2dOpMaker::Make() {
168168
"be ignored."); // TODO(Chengduo): Add checker.
169169
// (Currently,
170170
// TypedAttrChecker don't support vector type.)
171-
AddAttr<bool>("global_pooling",
172-
"(bool, default false) Whether to use the global pooling. "
173-
"If global_pooling = true, ksize and paddings will be ignored.")
171+
AddAttr<bool>(
172+
"global_pooling",
173+
"(bool, default false) Whether to use the global pooling. "
174+
"If global_pooling = true, kernel size and paddings will be ignored.")
174175
.SetDefault(false);
175176
AddAttr<std::vector<int>>("strides",
176177
"(vector<int>, default {1, 1}), strides(height, "
@@ -182,7 +183,7 @@ void Pool2dOpMaker::Make() {
182183
"paddings",
183184
"(vector<int>, default {0,0}), paddings(height, width) of pooling "
184185
"operator."
185-
"If global_pooling = true, paddings and ksize will be ignored.")
186+
"If global_pooling = true, paddings and kernel size will be ignored.")
186187
.SetDefault({0, 0});
187188
AddAttr<bool>(
188189
"exclusive",
@@ -204,7 +205,7 @@ void Pool2dOpMaker::Make() {
204205
.SetDefault(false);
205206
AddAttr<bool>(
206207
"ceil_mode",
207-
"(bool, default false) Wether to use the ceil function to calculate "
208+
"(bool, default false) Whether to use the ceil function to calculate "
208209
"output height and width. False is the default. If it is set to False, "
209210
"the floor function will be used.")
210211
.SetDefault(false);
@@ -259,31 +260,40 @@ The input(X) size and output(Out) size may be different.
259260
W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1
260261
$$
261262
262-
For exclusive = true:
263+
For exclusive = false:
263264
$$
264265
hstart = i * strides[0] - paddings[0]
266+
$$
267+
$$
265268
hend = hstart + ksize[0]
269+
$$
270+
$$
266271
wstart = j * strides[1] - paddings[1]
272+
$$
273+
$$
267274
wend = wstart + ksize[1]
275+
$$
276+
$$
268277
Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{ksize[0] * ksize[1]}
269278
$$
270-
For exclusive = false:
279+
280+
For exclusive = true:
271281
$$
272282
hstart = max(0, i * strides[0] - paddings[0])
283+
$$
284+
$$
273285
hend = min(H, hstart + ksize[0])
286+
$$
287+
$$
274288
wstart = max(0, j * strides[1] - paddings[1])
289+
$$
290+
$$
275291
wend = min(W, wstart + ksize[1])
292+
$$
293+
$$
276294
Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
277295
$$
278296
279-
For adaptive = true:
280-
$$
281-
hstart = floor(i * H_{in} / H_{out})
282-
hend = ceil((i + 1) * H_{in} / H_{out})
283-
wstart = floor(j * W_{in} / W_{out})
284-
wend = ceil((j + 1) * W_{in} / W_{out})
285-
Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
286-
$$
287297
)DOC");
288298
}
289299

@@ -324,7 +334,7 @@ void Pool3dOpMaker::Make() {
324334
AddAttr<bool>(
325335
"global_pooling",
326336
"(bool, default false) Whether to use the global pooling. "
327-
"If global_pooling = true, ksize and paddings wille be ignored.")
337+
"If global_pooling = true, kernel size and paddings will be ignored.")
328338
.SetDefault(false);
329339
AddAttr<std::vector<int>>(
330340
"strides",
@@ -359,7 +369,7 @@ void Pool3dOpMaker::Make() {
359369
.SetDefault(false);
360370
AddAttr<bool>(
361371
"ceil_mode",
362-
"(bool, default false) Wether to use the ceil function to calculate "
372+
"(bool, default false) Whether to use the ceil function to calculate "
363373
"output height and width. False is the default. If it is set to False, "
364374
"the floor function will be used.")
365375
.SetDefault(false);
@@ -392,48 +402,68 @@ width, respectively. The input(X) size and output(Out) size may be different.
392402
Output:
393403
Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
394404
For ceil_mode = false:
395-
$$
396-
D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
397-
H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\
398-
W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
399-
$$
405+
$$
406+
D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1
407+
$$
408+
$$
409+
H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[2]} + 1
410+
$$
411+
$$
412+
W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
413+
$$
400414
For ceil_mode = true:
401-
$$
402-
D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0] + strides[0] -1)}{strides[0]} + 1 \\
403-
H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1 \\
404-
W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1
405-
$$
406-
For exclusive = true:
407-
$$
408-
dstart = i * strides[0] - paddings[0]
409-
dend = dstart + ksize[0]
410-
hstart = j * strides[1] - paddings[1]
411-
hend = hstart + ksize[1]
412-
wstart = k * strides[2] - paddings[2]
413-
wend = wstart + ksize[2]
414-
Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{ksize[0] * ksize[1] * ksize[2]}
415-
$$
415+
$$
416+
D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0] + strides[0] -1)}{strides[0]} + 1
417+
$$
418+
$$
419+
H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1
420+
$$
421+
$$
422+
W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1
423+
$$
424+
416425
For exclusive = false:
417-
$$
418-
dstart = max(0, i * strides[0] - paddings[0])
419-
dend = min(D, dstart + ksize[0])
420-
hstart = max(0, j * strides[1] - paddings[1])
421-
hend = min(H, hstart + ksize[1])
422-
wstart = max(0, k * strides[2] - paddings[2])
423-
wend = min(W, wstart + ksize[2])
424-
Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
425-
$$
426-
427-
For adaptive = true:
428-
$$
429-
dstart = floor(i * D_{in} / D_{out})
430-
dend = ceil((i + 1) * D_{in} / D_{out})
431-
hstart = floor(j * H_{in} / H_{out})
432-
hend = ceil((j + 1) * H_{in} / H_{out})
433-
wstart = floor(k * W_{in} / W_{out})
434-
wend = ceil((k + 1) * W_{in} / W_{out})
435-
Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
436-
$$
426+
$$
427+
dstart = i * strides[0] - paddings[0]
428+
$$
429+
$$
430+
dend = dstart + ksize[0]
431+
$$
432+
$$
433+
hstart = j * strides[1] - paddings[1]
434+
$$
435+
$$
436+
hend = hstart + ksize[1]
437+
$$
438+
$$
439+
wstart = k * strides[2] - paddings[2]
440+
$$
441+
$$
442+
wend = wstart + ksize[2]
443+
$$
444+
$$
445+
Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{ksize[0] * ksize[1] * ksize[2]}
446+
$$
447+
448+
For exclusive = true:
449+
$$
450+
dstart = max(0, i * strides[0] - paddings[0])
451+
$$
452+
$$
453+
dend = min(D, dstart + ksize[0])
454+
$$
455+
$$
456+
hend = min(H, hstart + ksize[1])
457+
$$
458+
$$
459+
wstart = max(0, k * strides[2] - paddings[2])
460+
$$
461+
$$
462+
wend = min(W, wstart + ksize[2])
463+
$$
464+
$$
465+
Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
466+
$$
437467
438468
)DOC");
439469
}

python/paddle/fluid/layers/detection.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -545,15 +545,16 @@ def yolov3_loss(x,
545545
TypeError: Attr ignore_thresh of yolov3_loss must be a float number
546546
547547
Examples:
548-
.. code-block:: python
549-
550-
x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32')
551-
gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32')
552-
gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32')
553-
anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
554-
anchors = [0, 1, 2]
555-
loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, class_num=80, anchors=anchors,
556-
ignore_thresh=0.5, downsample_ratio=32)
548+
.. code-block:: python
549+
550+
x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32')
551+
gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32')
552+
gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32')
553+
anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
554+
anchor_mask = [0, 1, 2]
555+
loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, gtlabel=gtlabel, anchors=anchors,
556+
anchor_mask=anchor_mask, class_num=80,
557+
ignore_thresh=0.7, downsample_ratio=32)
557558
"""
558559
helper = LayerHelper('yolov3_loss', **locals())
559560

0 commit comments

Comments
 (0)