add benchmark for optical flow Raft model

WanliZhong · WanliZhong · commit 57d9a5036815 · 2023-12-28T19:14:38.000+08:00
diff --git a/README.md b/README.md
@@ -107,6 +107,10 @@ Some examples are listed below. You can find more in the directory of each model
 
 ![crnn_demo](./models/text_recognition_crnn/example_outputs/CRNNCTC.gif)
 
+### Optical Estimation with [RAFT](./models/optical_flow_estimation_raft/)
+
+![raft_demo](./models/optical_flow_estimation_raft/example_outputs/result.jpg)
+
 ## License
 
 OpenCV Zoo is licensed under the [Apache 2.0 license](./LICENSE). Please refer to licenses of different models.
diff --git a/benchmark/config/optical_flow_estimation_raft.yaml b/benchmark/config/optical_flow_estimation_raft.yaml
@@ -0,0 +1,16 @@
+Benchmark:
+  name: "Optical Flow Estimation Benchmark"
+  type: "OpticalFlow"
+  data:
+    path: "data/optical_flow_estimation"
+    files: [["driving0.png", "driving1.png"], ["flyingThings3D0.png", "flyingThings3D1.png"], ["monkaa0.png", "monkaa1.png"]]
+    sizes: # [[w1, h1], ...], Omit to run at original scale
+      - [360, 480]
+  metric:
+    warmup: 30
+    repeat: 10
+  backend: "default"
+  target: "cpu"
+
+Model:
+  name: "Raft"
diff --git a/benchmark/download_data.py b/benchmark/download_data.py
@@ -217,6 +217,10 @@ def get_confirm_token(response):  # in case of large files
         url='https://drive.google.com/u/0/uc?id=1RbLyetgqFUTt0IHaVmu6c_b7KeXJgKbc&export=download',
         sha='fbae2fb0a47fe65e316bbd0ec57ba21461967550',
         filename='person_detection.zip'),
+    optical_flow_estimation=Downloader(name='optical_flow_estimation',
+        url='https://drive.google.com/u/0/uc?id=1_fvN7cgc-j92MeI_wHKGkhWxbXeML_gR&export=download',
+        sha='96b75eaef250efdde62184b07707827d76bd336c',
+        filename='optical_flow_estimation.zip'),
 )
 
 if __name__ == '__main__':
diff --git a/benchmark/utils/dataloaders/__init__.py b/benchmark/utils/dataloaders/__init__.py
@@ -2,5 +2,6 @@
 from .classification import ClassificationImageLoader
 from .recognition import RecognitionImageLoader
 from .tracking import TrackingVideoLoader
+from .optical_flow import OpticalFlowImageLoader
 
-__all__ = ['BaseImageLoader', 'BaseVideoLoader', 'ClassificationImageLoader', 'RecognitionImageLoader', 'TrackingVideoLoader']
+__all__ = ['BaseImageLoader', 'BaseVideoLoader', 'ClassificationImageLoader', 'RecognitionImageLoader', 'TrackingVideoLoader', 'OpticalFlowImageLoader']
diff --git a/benchmark/utils/dataloaders/optical_flow.py b/benchmark/utils/dataloaders/optical_flow.py
@@ -0,0 +1,24 @@
+import os
+
+import numpy as np
+import cv2 as cv
+
+from .base_dataloader import _BaseImageLoader
+from ..factory import DATALOADERS
+
+@DATALOADERS.register
+class OpticalFlowImageLoader(_BaseImageLoader):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def __iter__(self):
+        for case in self._files:
+            image0 = cv.imread(os.path.join(self._path, case[0]))
+            image1 = cv.imread(os.path.join(self._path, case[1]))
+            if [0, 0] in self._sizes:
+                yield "{}, {}".format(case[0], case[1]), image0, image1
+            else:
+                for size in self._sizes:
+                    image0_r = cv.resize(image0, size)
+                    image1_r = cv.resize(image1, size)
+                    yield "{}, {}".format(case[0], case[1]), image0_r, image1_r
diff --git a/benchmark/utils/metrics/__init__.py b/benchmark/utils/metrics/__init__.py
@@ -2,5 +2,6 @@
 from .detection import Detection
 from .recognition import Recognition
 from .tracking import Tracking
+from .optical_flow import OpticalFlow
 
-__all__ = ['Base', 'Detection', 'Recognition', 'Tracking']
+__all__ = ['Base', 'Detection', 'Recognition', 'Tracking', 'OpticalFlow']
diff --git a/benchmark/utils/metrics/optical_flow.py b/benchmark/utils/metrics/optical_flow.py
@@ -0,0 +1,22 @@
+import cv2 as cv
+
+from .base_metric import BaseMetric
+from ..factory import METRICS
+
+@METRICS.register
+class OpticalFlow(BaseMetric):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def forward(self, model, *args, **kwargs):
+        img0, img1 = args
+
+        self._timer.reset()
+        for _ in range(self._warmup):
+            model.infer(img0, img1)
+        for _ in range(self._repeat):
+            self._timer.start()
+            model.infer(img0, img1)
+            self._timer.stop()
+
+        return self._timer.getRecords()
diff --git a/models/__init__.py b/models/__init__.py
@@ -20,6 +20,7 @@
 from .facial_expression_recognition.facial_fer_model import FacialExpressionRecog
 from .object_tracking_vittrack.vittrack import VitTrack
 from .text_detection_ppocr.ppocr_det import PPOCRDet
+from .optical_flow_estimation_raft.raft import Raft
 
 class ModuleRegistery:
     def __init__(self, name):
@@ -94,3 +95,4 @@ def register(self, item):
 MODELS.register(FacialExpressionRecog)
 MODELS.register(VitTrack)
 MODELS.register(PPOCRDet)
+MODELS.register(Raft)
diff --git a/models/optical_flow_estimation_raft/raft.py b/models/optical_flow_estimation_raft/raft.py
@@ -29,6 +29,12 @@ def _preprocess(self, image):
         img_input = img_input.astype(np.float32)
         return img_input
 
+    def setBackendAndTarget(self, backendId, targetId):
+        self.backend_id = backendId
+        self.target_id = targetId
+        self.model.setPreferableBackend(self.backend_id)
+        self.model.setPreferableTarget(self.target_id)
+
     def infer(self, image1, image2):
 
         # Preprocess