Skip to content

Commit cb27a92

Browse files
authored
Merge pull request #13971 from sefira/FasterOpDoc
generate proposal labels doc
2 parents 3c957af + 6c1d74b commit cb27a92

File tree

2 files changed

+111
-25
lines changed

2 files changed

+111
-25
lines changed

paddle/fluid/operators/detection/generate_proposal_labels_op.cc

Lines changed: 80 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -439,31 +439,88 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
439439
class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
440440
public:
441441
void Make() override {
442-
// TODO(buxingyuan): Add Document
443-
AddInput("RpnRois", "RpnRois.");
444-
AddInput("GtClasses", "GtClasses.");
445-
AddInput("IsCrowd", "IsCrowd.");
446-
AddInput("GtBoxes", "GtBoxes.");
447-
AddInput("ImInfo", "ImInfo.");
448-
449-
AddOutput("Rois", "Rois.");
450-
AddOutput("LabelsInt32", "LabelsInt32.");
451-
AddOutput("BboxTargets", "BboxTargets.");
452-
AddOutput("BboxInsideWeights", "BboxInsideWeights.");
453-
AddOutput("BboxOutsideWeights", "BboxOutsideWeights.");
454-
455-
AddAttr<int>("batch_size_per_im", "batch_size_per_im");
456-
AddAttr<float>("fg_fraction", "fg_fraction");
457-
AddAttr<float>("fg_thresh", "fg_thresh");
458-
AddAttr<float>("bg_thresh_hi", "bg_thresh_hi");
459-
AddAttr<float>("bg_thresh_lo", "bg_thresh_lo");
460-
AddAttr<std::vector<float>>("bbox_reg_weights", "bbox_reg_weights");
461-
AddAttr<int>("class_nums", "class_nums");
462-
AddAttr<bool>("use_random", "use_random").SetDefault(true);
442+
AddInput(
443+
"RpnRois",
444+
"(LoDTensor), This input is a 2D LoDTensor with shape [N, 4]. "
445+
"N is the number of the GenerateProposalOp's output, "
446+
"each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
447+
AddInput("GtClasses",
448+
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
449+
"M is the number of groundtruth, "
450+
"each element is a class label of groundtruth.");
451+
AddInput(
452+
"IsCrowd",
453+
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
454+
"M is the number of groundtruth, "
455+
"each element is a flag indicates whether a groundtruth is crowd.");
456+
AddInput(
457+
"GtBoxes",
458+
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 4]. "
459+
"M is the number of groundtruth, "
460+
"each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
461+
AddInput("ImInfo",
462+
"(Tensor), This input is a 2D Tensor with shape [B, 3]. "
463+
"B is the number of input images, "
464+
"each element consists of im_height, im_width, im_scale.");
465+
466+
AddOutput(
467+
"Rois",
468+
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
469+
"P usuall equal to batch_size_per_im * batch_size, "
470+
"each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
471+
AddOutput("LabelsInt32",
472+
"(LoDTensor), This output is a 2D LoDTensor with shape [P], "
473+
"each element repersents a class label of a roi");
474+
AddOutput("BboxTargets",
475+
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
476+
"class_nums], "
477+
"each element repersents a box label of a roi");
478+
AddOutput(
479+
"BboxInsideWeights",
480+
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
481+
"class_nums], "
482+
"each element indicates whether a box should contribute to loss.");
483+
AddOutput(
484+
"BboxOutsideWeights",
485+
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
486+
"class_nums], "
487+
"each element indicates whether a box should contribute to loss.");
488+
489+
AddAttr<int>("batch_size_per_im", "Batch size of rois per images.");
490+
AddAttr<float>("fg_fraction",
491+
"Foreground fraction in total batch_size_per_im.");
492+
AddAttr<float>(
493+
"fg_thresh",
494+
"Overlap threshold which is used to chose foreground sample.");
495+
AddAttr<float>("bg_thresh_hi",
496+
"Overlap threshold upper bound which is used to chose "
497+
"background sample.");
498+
AddAttr<float>("bg_thresh_lo",
499+
"Overlap threshold lower bound which is used to chose "
500+
"background sample.");
501+
AddAttr<std::vector<float>>("bbox_reg_weights", "Box regression weights.");
502+
AddAttr<int>("class_nums", "Class number.");
503+
AddAttr<bool>(
504+
"use_random",
505+
"Use random sampling to choose foreground and background boxes.")
506+
.SetDefault(true);
463507

464508
AddComment(R"DOC(
465-
Generate Proposals Labels Operator.
466-
)DOC");
509+
This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
510+
to sample foreground boxes and background boxes, and compute loss target.
511+
512+
RpnRois is the output boxes of RPN and was processed by generate_proposal_op, these boxes
513+
were combined with groundtruth boxes and sampled according to batch_size_per_im and fg_fraction,
514+
If an instance with a groundtruth overlap greater than fg_thresh, then it was considered as a foreground sample.
515+
If an instance with a groundtruth overlap greater than bg_thresh_lo and lower than bg_thresh_hi,
516+
then it was considered as a background sample.
517+
After all foreground and background boxes are chosen (so called Rois),
518+
then we apply random sampling to make sure
519+
the number of foreground boxes is no more than batch_size_per_im * fg_fraction.
520+
521+
For each box in Rois, we assign the classification (class label) and regression targets (box label) to it.
522+
Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss.
523+
)DOC");
467524
}
468525
};
469526

python/paddle/fluid/layers/detection.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,7 +1424,36 @@ def generate_proposal_labels(rpn_rois,
14241424
use_random=True):
14251425
"""
14261426
** Generate proposal labels Faster-RCNN **
1427-
TODO(buxingyuan): Add Document
1427+
This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
1428+
to sample foreground boxes and background boxes, and compute loss target.
1429+
1430+
RpnRois is the output boxes of RPN and was processed by generate_proposal_op, these boxes
1431+
were combined with groundtruth boxes and sampled according to batch_size_per_im and fg_fraction,
1432+
If an instance with a groundtruth overlap greater than fg_thresh, then it was considered as a foreground sample.
1433+
If an instance with a groundtruth overlap greater than bg_thresh_lo and lower than bg_thresh_hi,
1434+
then it was considered as a background sample.
1435+
After all foreground and background boxes are chosen (so called Rois),
1436+
then we apply random sampling to make sure
1437+
the number of foreground boxes is no more than batch_size_per_im * fg_fraction.
1438+
1439+
For each box in Rois, we assign the classification (class label) and regression targets (box label) to it.
1440+
Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss.
1441+
1442+
Args:
1443+
rpn_rois(Variable): A 2-D LoDTensor with shape [N, 4]. N is the number of the GenerateProposalOp's output, each element is a bounding box with [xmin, ymin, xmax, ymax] format.
1444+
gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a class label of groundtruth.
1445+
is_crowd(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a flag indicates whether a groundtruth is crowd.
1446+
gt_boxes(Variable): A 2-D LoDTensor with shape [M, 4]. M is the number of groundtruth, each element is a bounding box with [xmin, ymin, xmax, ymax] format.
1447+
im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the number of input images, each element consists of im_height, im_width, im_scale.
1448+
1449+
batch_size_per_im(int): Batch size of rois per images.
1450+
fg_fraction(float): Foreground fraction in total batch_size_per_im.
1451+
fg_thresh(float): Overlap threshold which is used to chose foreground sample.
1452+
bg_thresh_hi(float): Overlap threshold upper bound which is used to chose background sample.
1453+
bg_thresh_lo(float): Overlap threshold lower bound which is used to chose background sample.
1454+
bbox_reg_weights(list|tuple): Box regression weights.
1455+
class_nums(int): Class number.
1456+
use_random(bool): Use random sampling to choose foreground and background boxes.
14281457
"""
14291458

14301459
helper = LayerHelper('generate_proposal_labels', **locals())
@@ -1487,7 +1516,7 @@ def generate_proposals(scores,
14871516
eta=1.0,
14881517
name=None):
14891518
"""
1490-
** Generate proposal labels Faster-RCNN **
1519+
** Generate proposal Faster-RCNN **
14911520
14921521
This operation proposes RoIs according to each box with their probability to be a foreground object and
14931522
the box can be calculated by anchors. Bbox_deltais and scores to be an object are the output of RPN. Final proposals

0 commit comments

Comments
 (0)