@@ -104,7 +104,46 @@ class MaskRCNN(FasterRCNN):
104
104
105
105
Example::
106
106
107
- >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
107
+ >>> import torchvision
108
+ >>> from torchvision.models.detection import MaskRCNN
109
+ >>> from torchvision.models.detection.rpn import AnchorGenerator
110
+ >>>
111
+ >>> # load a pre-trained model for classification and return
112
+ >>> # only the features
113
+ >>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
114
+ >>> # MaskRCNN needs to know the number of
115
+ >>> # output channels in a backbone. For mobilenet_v2, it's 1280
116
+ >>> # so we need to add it here
117
+ >>> backbone.out_channels = 1280
118
+ >>>
119
+ >>> # let's make the RPN generate 5 x 3 anchors per spatial
120
+ >>> # location, with 5 different sizes and 3 different aspect
121
+ >>> # ratios. We have a Tuple[Tuple[int]] because each feature
122
+ >>> # map could potentially have different sizes and
123
+ >>> # aspect ratios
124
+ >>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
125
+ >>> aspect_ratios=((0.5, 1.0, 2.0),))
126
+ >>>
127
+ >>> # let's define what are the feature maps that we will
128
+ >>> # use to perform the region of interest cropping, as well as
129
+ >>> # the size of the crop after rescaling.
130
+ >>> # if your backbone returns a Tensor, featmap_names is expected to
131
+ >>> # be [0]. More generally, the backbone should return an
132
+ >>> # OrderedDict[Tensor], and in featmap_names you can choose which
133
+ >>> # feature maps to use.
134
+ >>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
135
+ >>> output_size=7,
136
+ >>> sampling_ratio=2)
137
+ >>>
138
+ >>> mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
139
+ >>> output_size=14,
140
+ >>> sampling_ratio=2)
141
+ >>> # put the pieces together inside a FasterRCNN model
142
+ >>> model = MaskRCNN(backbone,
143
+ >>> num_classes=2,
144
+ >>> rpn_anchor_generator=anchor_generator,
145
+ >>> box_roi_pool=roi_pooler,
146
+ >>> mask_roi_pool=mask_roi_pooler)
108
147
>>> model.eval()
109
148
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
110
149
>>> predictions = model(x)
@@ -149,8 +188,10 @@ def __init__(self, backbone, num_classes=None,
149
188
mask_head = MaskRCNNHeads (out_channels , mask_layers , mask_dilation )
150
189
151
190
if mask_predictor is None :
152
- mask_dim_reduced = 256 # == mask_layers[-1]
153
- mask_predictor = MaskRCNNPredictor (out_channels , mask_dim_reduced , num_classes )
191
+ mask_predictor_in_channels = 256 # == mask_layers[-1]
192
+ mask_dim_reduced = 256
193
+ mask_predictor = MaskRCNNPredictor (mask_predictor_in_channels ,
194
+ mask_dim_reduced , num_classes )
154
195
155
196
super (MaskRCNN , self ).__init__ (
156
197
backbone , num_classes ,
0 commit comments