Merge pull request #240 from 1adrianb/python-comp

1adrianb · web-flow · commit 03257a82fe99 · 2020-12-19T09:07:35.000Z
Python comp
diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -24,6 +24,7 @@ requirements:
     - scipy
     - opencv
     - tqdm
+    - numba
 
 about:
   home: https://github.com/1adrianb/face-alignment
diff --git a/face_alignment/__init__.py b/face_alignment/__init__.py
@@ -4,4 +4,4 @@
 __email__ = 'adrian@adrianbulat.com'
 __version__ = '1.3.0'
 
-from .api import FaceAlignment, LandmarksType, NetworkSize
+from .api import FaceAlignment, LandmarksType, NetworkSize
diff --git a/face_alignment/api.py b/face_alignment/api.py
@@ -128,10 +128,11 @@ def get_landmarks_from_image(self, image_or_path, detected_faces=None):
             out = self.face_alignment_net(inp).detach()
             if self.flip_input:
                 out += flip(self.face_alignment_net(flip(inp)).detach(), is_label=True)
-            out = out.cpu()
+            out = out.cpu().numpy()
 
             pts, pts_img = get_preds_fromhm(out, center, scale)
             pts, pts_img = pts.view(68, 2) * 4, pts_img.view(68, 2)
+            pts, pts_img = torch.from_numpy(pts), torch.from_numpy(pts_img)
 
             if self.landmarks_type == LandmarksType._3D:
                 heatmaps = np.zeros((68, 256, 256), dtype=np.float32)
diff --git a/face_alignment/detection/core.py b/face_alignment/detection/core.py
@@ -3,7 +3,6 @@
 from tqdm import tqdm
 import numpy as np
 import torch
-import cv2
 from skimage import io
 
 
diff --git a/face_alignment/detection/sfd/bbox.py b/face_alignment/detection/sfd/bbox.py
@@ -1,43 +1,6 @@
-import os
-import sys
-import cv2
-import random
-import datetime
-import time
 import math
-import argparse
 import numpy as np
-import torch
-
-try:
-    from iou import IOU
-except BaseException:
-    # IOU cython speedup 10x
-    def IOU(ax1, ay1, ax2, ay2, bx1, by1, bx2, by2):
-        sa = abs((ax2 - ax1) * (ay2 - ay1))
-        sb = abs((bx2 - bx1) * (by2 - by1))
-        x1, y1 = max(ax1, bx1), max(ay1, by1)
-        x2, y2 = min(ax2, bx2), min(ay2, by2)
-        w = x2 - x1
-        h = y2 - y1
-        if w < 0 or h < 0:
-            return 0.0
-        else:
-            return 1.0 * w * h / (sa + sb - w * h)
-
-
-def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh):
-    xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1
-    dx, dy = (xc - axc) / aww, (yc - ayc) / ahh
-    dw, dh = math.log(ww / aww), math.log(hh / ahh)
-    return dx, dy, dw, dh
-
-
-def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh):
-    xc, yc = dx * aww + axc, dy * ahh + ayc
-    ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh
-    x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2
-    return x1, y1, x2, y2
+from numba import jit
 
 
 def nms(dets, thresh):
@@ -82,11 +45,13 @@ def encode(matched, priors, variances):
     g_cxcy /= (variances[0] * priors[:, 2:])
     # match wh / prior wh
     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
-    g_wh = torch.log(g_wh) / variances[1]
+    g_wh = np.log(g_wh) / variances[1]
+
     # return target for smooth_l1_loss
-    return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
+    return np.concatenate([g_cxcy, g_wh], 1)  # [num_priors,4]
 
 
+@jit(nopython=True)
 def decode(loc, priors, variances):
     """Decode locations from predictions using priors to undo
     the encoding we did for offset regression at train time.
@@ -100,9 +65,9 @@ def decode(loc, priors, variances):
         decoded bounding box predictions
     """
 
-    boxes = torch.cat((
+    boxes = np.concatenate((
         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
-        priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
+        priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1)
     boxes[:, :2] -= boxes[:, 2:] / 2
     boxes[:, 2:] += boxes[:, :2]
     return boxes
diff --git a/face_alignment/detection/sfd/detect.py b/face_alignment/detection/sfd/detect.py
@@ -1,27 +1,20 @@
 import torch
 import torch.nn.functional as F
 
-import os
-import sys
 import cv2
-import random
-import datetime
-import math
-import argparse
 import numpy as np
+from numba import jit
+from numba.typed import List
 
-import scipy.io as sio
-import zipfile
-from .net_s3fd import s3fd
 from .bbox import *
 
 
 def detect(net, img, device):
     img = img.transpose(2, 0, 1)
     # Creates a batch of 1
-    img = img.reshape((1,) + img.shape)
+    img = np.expand_dims(img, 0)
 
-    img = torch.from_numpy(img).float().to(device)
+    img = torch.from_numpy(img).to(device, dtype=torch.float32)
 
     return batch_detect(net, img, device)
 
@@ -35,37 +28,41 @@ def batch_detect(net, img_batch, device):
     if 'cuda' in device:
         torch.backends.cudnn.benchmark = True
 
-    BB, CC, HH, WW = img_batch.size()
+    batch_size = img_batch.size(0)
+    img_batch = img_batch.to(device, dtype=torch.float32)
 
     img_batch = img_batch.flip(-3)  # RGB to BGR
-    img_batch = img_batch - torch.Tensor([104, 117, 123]).view(1, 3, 1, 1)
+    img_batch = img_batch - torch.tensor([104.0, 117.0, 123.0], device=device).view(1, 3, 1, 1)
 
     with torch.no_grad():
-        olist = net(img_batch.float())  # patched uint8_t overflow error
+        olist = net(img_batch)  # patched uint8_t overflow error
 
     for i in range(len(olist) // 2):
         olist[i * 2] = F.softmax(olist[i * 2], dim=1)
 
-    bboxlists = []
+    olist = [oelem.data.cpu().numpy() for oelem in olist]
+
+    bboxlists = get_predictions(List(olist), batch_size)
+    return bboxlists
 
-    olist = [oelem.data.cpu() for oelem in olist]
 
-    for j in range(BB):
+@jit(nopython=True)
+def get_predictions(olist, batch_size):
+    bboxlists = []
+    variances = [0.1, 0.2]
+    for j in range(batch_size):
         bboxlist = []
         for i in range(len(olist) // 2):
             ocls, oreg = olist[i * 2], olist[i * 2 + 1]
-            FB, FC, FH, FW = ocls.size()  # feature map size
             stride = 2**(i + 2)    # 4,8,16,32,64,128
-            anchor = stride * 4
             poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
             for Iindex, hindex, windex in poss:
                 axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
                 score = ocls[j, 1, hindex, windex]
-                loc = oreg[j, :, hindex, windex].contiguous().view(1, 4)
-                priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
-                variances = [0.1, 0.2]
+                loc = oreg[j, :, hindex, windex].copy().reshape(1, 4)
+                priors = np.array([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
                 box = decode(loc, priors, variances)
-                x1, y1, x2, y2 = box[0] * 1.0
+                x1, y1, x2, y2 = box[0]
                 bboxlist.append([x1, y1, x2, y2, score])
 
         bboxlists.append(bboxlist)
diff --git a/face_alignment/detection/sfd/net_s3fd.py b/face_alignment/detection/sfd/net_s3fd.py
@@ -68,40 +68,40 @@ def __init__(self):
         self.conv7_2_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
 
     def forward(self, x):
-        h = F.relu(self.conv1_1(x))
-        h = F.relu(self.conv1_2(h))
+        h = F.relu(self.conv1_1(x), inplace=True)
+        h = F.relu(self.conv1_2(h), inplace=True)
         h = F.max_pool2d(h, 2, 2)
 
-        h = F.relu(self.conv2_1(h))
-        h = F.relu(self.conv2_2(h))
+        h = F.relu(self.conv2_1(h), inplace=True)
+        h = F.relu(self.conv2_2(h), inplace=True)
         h = F.max_pool2d(h, 2, 2)
 
-        h = F.relu(self.conv3_1(h))
-        h = F.relu(self.conv3_2(h))
-        h = F.relu(self.conv3_3(h))
+        h = F.relu(self.conv3_1(h), inplace=True)
+        h = F.relu(self.conv3_2(h), inplace=True)
+        h = F.relu(self.conv3_3(h), inplace=True)
         f3_3 = h
         h = F.max_pool2d(h, 2, 2)
 
-        h = F.relu(self.conv4_1(h))
-        h = F.relu(self.conv4_2(h))
-        h = F.relu(self.conv4_3(h))
+        h = F.relu(self.conv4_1(h), inplace=True)
+        h = F.relu(self.conv4_2(h), inplace=True)
+        h = F.relu(self.conv4_3(h), inplace=True)
         f4_3 = h
         h = F.max_pool2d(h, 2, 2)
 
-        h = F.relu(self.conv5_1(h))
-        h = F.relu(self.conv5_2(h))
-        h = F.relu(self.conv5_3(h))
+        h = F.relu(self.conv5_1(h), inplace=True)
+        h = F.relu(self.conv5_2(h), inplace=True)
+        h = F.relu(self.conv5_3(h), inplace=True)
         f5_3 = h
         h = F.max_pool2d(h, 2, 2)
 
-        h = F.relu(self.fc6(h))
-        h = F.relu(self.fc7(h))
+        h = F.relu(self.fc6(h), inplace=True)
+        h = F.relu(self.fc7(h), inplace=True)
         ffc7 = h
-        h = F.relu(self.conv6_1(h))
-        h = F.relu(self.conv6_2(h))
+        h = F.relu(self.conv6_1(h), inplace=True)
+        h = F.relu(self.conv6_2(h), inplace=True)
         f6_2 = h
-        h = F.relu(self.conv7_1(h))
-        h = F.relu(self.conv7_2(h))
+        h = F.relu(self.conv7_1(h), inplace=True)
+        h = F.relu(self.conv7_2(h), inplace=True)
         f7_2 = h
 
         f3_3 = self.conv3_3_norm(f3_3)
diff --git a/face_alignment/detection/sfd/sfd_detector.py b/face_alignment/detection/sfd/sfd_detector.py
@@ -34,7 +34,7 @@ def _filter_bboxes(self, bboxlist, threshold=0.5):
         if len(bboxlist) > 0:
             keep = nms(bboxlist, 0.3)
             bboxlist = bboxlist[keep, :]
-            bboxlist = [x for x in bboxlist if x[-1] > 0.5]
+            bboxlist = [x for x in bboxlist if x[-1] > threshold]
 
         return bboxlist
 
diff --git a/face_alignment/utils.py b/face_alignment/utils.py
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.py b/setup.py
diff --git a/test/test_utils.py b/test/test_utils.py

-Original file line number
+Diff line change
     - scipy
     - opencv
     - tqdm
 +    - numba
 about:
   home: https://github.com/1adrianb/face-alignment