add a scriptable ResizeShortestEdge.get_output_shape

ppwwyyxx · facebook-github-bot · commit e9cbe9021210 · 2021-09-08T10:58:47.000-07:00
Summary: this function alone is useful in deployment

Reviewed By: zhanghang1989

Differential Revision: D30801733

fbshipit-source-id: 792f8ca016f2c6782fc25c9bbaa302588597d087
diff --git a/detectron2/data/transforms/augmentation_impl.py b/detectron2/data/transforms/augmentation_impl.py
@@ -6,6 +6,7 @@
 import numpy as np
 import sys
 from typing import Tuple
+import torch
 from fvcore.transforms.transform import (
     BlendTransform,
     CropTransform,
@@ -131,6 +132,7 @@ class ResizeShortestEdge(Augmentation):
     If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
     """
 
+    @torch.jit.unused
     def __init__(
         self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR
     ):
@@ -155,6 +157,7 @@ def __init__(
             )
         self._init(locals())
 
+    @torch.jit.unused
     def get_transform(self, image):
         h, w = image.shape[:2]
         if self.is_range:
@@ -164,18 +167,30 @@ def get_transform(self, image):
         if size == 0:
             return NoOpTransform()
 
-        scale = size * 1.0 / min(h, w)
+        newh, neww = ResizeShortestEdge.get_output_shape(h, w, size, self.max_size)
+        return ResizeTransform(h, w, newh, neww, self.interp)
+
+    @staticmethod
+    def get_output_shape(
+        oldh: int, oldw: int, short_edge_length: int, max_size: int
+    ) -> Tuple[int, int]:
+        """
+        Compute the output size given input size and target short edge length.
+        """
+        h, w = oldh, oldw
+        size = short_edge_length * 1.0
+        scale = size / min(h, w)
         if h < w:
             newh, neww = size, scale * w
         else:
             newh, neww = scale * h, size
-        if max(newh, neww) > self.max_size:
-            scale = self.max_size * 1.0 / max(newh, neww)
+        if max(newh, neww) > max_size:
+            scale = max_size * 1.0 / max(newh, neww)
             newh = newh * scale
             neww = neww * scale
         neww = int(neww + 0.5)
         newh = int(newh + 0.5)
-        return ResizeTransform(h, w, newh, neww, self.interp)
+        return (newh, neww)
 
 
 class ResizeScale(Augmentation):
@@ -393,7 +408,7 @@ def get_crop_size(self, image_size):
             cw = np.random.randint(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1)
             return ch, cw
         else:
-            NotImplementedError("Unknown crop type {}".format(self.crop_type))
+            raise NotImplementedError("Unknown crop type {}".format(self.crop_type))
 
 
 class RandomCrop_CategoryAreaConstraint(Augmentation):
diff --git a/detectron2/export/torchscript.py b/detectron2/export/torchscript.py
@@ -71,6 +71,7 @@ def dump_torchscript_IR(model, dir):
         model (TracedModule/ScriptModule/ScriptFUnction): traced or scripted module
         dir (str): output directory to dump files.
     """
+    dir = os.path.expanduser(dir)
     PathManager.mkdirs(dir)
 
     def _get_script_mod(mod):
diff --git a/detectron2/modeling/mmdet_wrapper.py b/detectron2/modeling/mmdet_wrapper.py
@@ -79,7 +79,7 @@ def __init__(
         # "Neck" weights, if any, are part of neck itself. This is the interface
         # of mmdet so we follow it. Reference:
         # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py
-        logger.info(f"Initializing mmdet backbone weights...")
+        logger.info("Initializing mmdet backbone weights...")
         self.backbone.init_weights()
         # train() in mmdet modules is non-trivial, and has to be explicitly
         # called. Reference:
diff --git a/tests/data/test_transforms.py b/tests/data/test_transforms.py
@@ -5,7 +5,9 @@
 import numpy as np
 import unittest
 from unittest import mock
+import torch
 from PIL import Image, ImageOps
+from torch.nn import functional as F
 
 from detectron2.config import get_cfg
 from detectron2.data import detection_utils
@@ -225,7 +227,22 @@ def test_resize_transform(self):
             in_img = np.random.randint(0, 255, size=in_shape, dtype=np.uint8)
             tfm = T.ResizeTransform(in_shape[0], in_shape[1], out_shape[0], out_shape[1])
             out_img = tfm.apply_image(in_img)
-            self.assertTrue(out_img.shape == out_shape)
+            self.assertEqual(out_img.shape, out_shape)
+
+    def test_resize_shorted_edge_scriptable(self):
+        def f(image):
+            newh, neww = T.ResizeShortestEdge.get_output_shape(
+                image.shape[-2], image.shape[-1], 80, 133
+            )
+            return F.interpolate(image.unsqueeze(0), size=(newh, neww))
+
+        input = torch.randn(3, 10, 10)
+        script_f = torch.jit.script(f)
+        self.assertTrue(torch.allclose(f(input), script_f(input)))
+
+        # generalize to new shapes
+        input = torch.randn(3, 8, 100)
+        self.assertTrue(torch.allclose(f(input), script_f(input)))
 
     def test_extent_transform(self):
         input_shapes = [(100, 100), (100, 100, 1), (100, 100, 3)]