Skip to content

Commit 14b1bc9

Browse files
fmassasoumith
authored andcommitted
Add better docs for FasterRCNN, MaskRCNN and KeypointRCNN (#943)
1 parent 05bc255 commit 14b1bc9

File tree

3 files changed

+120
-5
lines changed

3 files changed

+120
-5
lines changed

torchvision/models/detection/faster_rcnn.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,41 @@ class FasterRCNN(GeneralizedRCNN):
9999
100100
Example::
101101
102-
>>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
102+
>>> import torchvision
103+
>>> from torchvision.models.detection import FasterRCNN
104+
>>> from torchvision.models.detection.rpn import AnchorGenerator
105+
>>> # load a pre-trained model for classification and return
106+
>>> # only the features
107+
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
108+
>>> # FasterRCNN needs to know the number of
109+
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
110+
>>> # so we need to add it here
111+
>>> backbone.out_channels = 1280
112+
>>>
113+
>>> # let's make the RPN generate 5 x 3 anchors per spatial
114+
>>> # location, with 5 different sizes and 3 different aspect
115+
>>> # ratios. We have a Tuple[Tuple[int]] because each feature
116+
>>> # map could potentially have different sizes and
117+
>>> # aspect ratios
118+
>>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
119+
>>> aspect_ratios=((0.5, 1.0, 2.0),))
120+
>>>
121+
>>> # let's define what are the feature maps that we will
122+
>>> # use to perform the region of interest cropping, as well as
123+
>>> # the size of the crop after rescaling.
124+
>>> # if your backbone returns a Tensor, featmap_names is expected to
125+
>>> # be [0]. More generally, the backbone should return an
126+
>>> # OrderedDict[Tensor], and in featmap_names you can choose which
127+
>>> # feature maps to use.
128+
>>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
129+
>>> output_size=7,
130+
>>> sampling_ratio=2)
131+
>>>
132+
>>> # put the pieces together inside a FasterRCNN model
133+
>>> model = FasterRCNN(backbone,
134+
>>> num_classes=2,
135+
>>> rpn_anchor_generator=anchor_generator,
136+
>>> box_roi_pool=roi_pooler)
103137
>>> model.eval()
104138
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
105139
>>> predictions = model(x)

torchvision/models/detection/keypoint_rcnn.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,47 @@ class KeypointRCNN(FasterRCNN):
101101
102102
Example::
103103
104-
>>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
104+
>>> import torchvision
105+
>>> from torchvision.models.detection import KeypointRCNN
106+
>>> from torchvision.models.detection.rpn import AnchorGenerator
107+
>>>
108+
>>> # load a pre-trained model for classification and return
109+
>>> # only the features
110+
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
111+
>>> # KeypointRCNN needs to know the number of
112+
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
113+
>>> # so we need to add it here
114+
>>> backbone.out_channels = 1280
115+
>>>
116+
>>> # let's make the RPN generate 5 x 3 anchors per spatial
117+
>>> # location, with 5 different sizes and 3 different aspect
118+
>>> # ratios. We have a Tuple[Tuple[int]] because each feature
119+
>>> # map could potentially have different sizes and
120+
>>> # aspect ratios
121+
>>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
122+
>>> aspect_ratios=((0.5, 1.0, 2.0),))
123+
>>>
124+
>>> # let's define what are the feature maps that we will
125+
>>> # use to perform the region of interest cropping, as well as
126+
>>> # the size of the crop after rescaling.
127+
>>> # if your backbone returns a Tensor, featmap_names is expected to
128+
>>> # be [0]. More generally, the backbone should return an
129+
>>> # OrderedDict[Tensor], and in featmap_names you can choose which
130+
>>> # feature maps to use.
131+
>>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
132+
>>> output_size=7,
133+
>>> sampling_ratio=2)
134+
>>>
135+
>>> keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
136+
>>> output_size=14,
137+
>>> sampling_ratio=2)
138+
>>> # put the pieces together inside a FasterRCNN model
139+
>>> model = KeypointRCNN(backbone,
140+
>>> num_classes=2,
141+
>>> rpn_anchor_generator=anchor_generator,
142+
>>> box_roi_pool=roi_pooler,
143+
>>> keypoint_roi_pool=keypoint_roi_pooler)
144+
>>> model.eval()
105145
>>> model.eval()
106146
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
107147
>>> predictions = model(x)

torchvision/models/detection/mask_rcnn.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,46 @@ class MaskRCNN(FasterRCNN):
104104
105105
Example::
106106
107-
>>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
107+
>>> import torchvision
108+
>>> from torchvision.models.detection import MaskRCNN
109+
>>> from torchvision.models.detection.rpn import AnchorGenerator
110+
>>>
111+
>>> # load a pre-trained model for classification and return
112+
>>> # only the features
113+
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
114+
>>> # MaskRCNN needs to know the number of
115+
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
116+
>>> # so we need to add it here
117+
>>> backbone.out_channels = 1280
118+
>>>
119+
>>> # let's make the RPN generate 5 x 3 anchors per spatial
120+
>>> # location, with 5 different sizes and 3 different aspect
121+
>>> # ratios. We have a Tuple[Tuple[int]] because each feature
122+
>>> # map could potentially have different sizes and
123+
>>> # aspect ratios
124+
>>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
125+
>>> aspect_ratios=((0.5, 1.0, 2.0),))
126+
>>>
127+
>>> # let's define what are the feature maps that we will
128+
>>> # use to perform the region of interest cropping, as well as
129+
>>> # the size of the crop after rescaling.
130+
>>> # if your backbone returns a Tensor, featmap_names is expected to
131+
>>> # be [0]. More generally, the backbone should return an
132+
>>> # OrderedDict[Tensor], and in featmap_names you can choose which
133+
>>> # feature maps to use.
134+
>>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
135+
>>> output_size=7,
136+
>>> sampling_ratio=2)
137+
>>>
138+
>>> mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
139+
>>> output_size=14,
140+
>>> sampling_ratio=2)
141+
>>> # put the pieces together inside a FasterRCNN model
142+
>>> model = MaskRCNN(backbone,
143+
>>> num_classes=2,
144+
>>> rpn_anchor_generator=anchor_generator,
145+
>>> box_roi_pool=roi_pooler,
146+
>>> mask_roi_pool=mask_roi_pooler)
108147
>>> model.eval()
109148
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
110149
>>> predictions = model(x)
@@ -149,8 +188,10 @@ def __init__(self, backbone, num_classes=None,
149188
mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation)
150189

151190
if mask_predictor is None:
152-
mask_dim_reduced = 256 # == mask_layers[-1]
153-
mask_predictor = MaskRCNNPredictor(out_channels, mask_dim_reduced, num_classes)
191+
mask_predictor_in_channels = 256 # == mask_layers[-1]
192+
mask_dim_reduced = 256
193+
mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
194+
mask_dim_reduced, num_classes)
154195

155196
super(MaskRCNN, self).__init__(
156197
backbone, num_classes,

0 commit comments

Comments
 (0)