add a demo of video inference (#200)

CoinCheung · web-flow · commit 03306c553d91 · 2021-11-13T12:01:54.000+08:00
diff --git a/README.md b/README.md
@@ -58,7 +58,13 @@ With a pretrained weight, you can run inference on an single image like this:
 $ python tools/demo.py --config configs/bisenetv2_city.py --weight-path /path/to/your/weights.pth --img-path ./example.png
 ```
 
-This would run inference on the image and save the result image to `./res.jpg`.
+This would run inference on the image and save the result image to `./res.jpg`.  
+
+Or you can run inference on a video like this:  
+```
+$ python tools/demo_video.py --config configs/bisenetv2_coco.py --weight-path res/model_final.pth --input ./video.mp4 --output res.mp4
+```
+This would generate segmentation file as `res.mp4`. If you want to read from camera, you can set `--input camera_id` rather than `input ./video.mp4`.   
 
 
 ## prepare dataset
diff --git a/openvino/README.md b/openvino/README.md
@@ -57,6 +57,10 @@ If you want to use gpu, you also need to install some dependencies inside the co
 
 I got the above commands from the official docs but I did not test it since my cpu does not have integrated gpu.  
 
+You can check if your platform has intel gpu with this command:  
+```
+$ sudo lspci | grep -i vga
+```
 
 4.configure environment  
 just run this script, and the environment would be ready:  
@@ -87,6 +91,6 @@ After this, you will see a segmentation result image named `res.jpg` generated.
 
 ### Tipes
 
-1.GPU support: openvino supports intel cpu and intel "gpu inside cpu". Until now(2021.11), other popular isolated gpus are not supported, such as nvidia/amd gpus. Also, other integrated gpus are not supported, such as aspeed graphics family.
+1. GPU support: openvino supports intel cpu and intel "gpu inside cpu". Until now(2021.11), other popular isolated gpus are not supported, such as nvidia/amd gpus. Also, other integrated gpus are not supported, such as aspeed graphics family.
 
-2.About low-precision: precision is optimized automatically, and the model will be run in one or several precision mode. We can also manually enforce to use bf16, as long as our cpu have `avx512_bf16` supports. If cpu does not support bf16, it will use simulation which would slow down the inference. If neither native bf16 nor simulation is supported, an error would occur.
+2. About low-precision: precision is optimized automatically, and the model will be run in one or several precision mode. We can also manually enforce to use bf16, as long as our cpu have `avx512_bf16` supports. If cpu does not support bf16, it will use simulation which would slow down the inference. If neither native bf16 nor simulation is supported, an error would occur.
diff --git a/tools/demo_video.py b/tools/demo_video.py
@@ -0,0 +1,136 @@
+
+import sys
+sys.path.insert(0, '.')
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from PIL import Image
+import numpy as np
+import cv2
+from torch.multiprocessing import Process, Queue
+
+import lib.transform_cv2 as T
+from lib.models import model_factory
+from configs import set_cfg_from_file
+
+
+torch.set_grad_enabled(False)
+
+
+# args
+parse = argparse.ArgumentParser()
+parse.add_argument('--config', dest='config', type=str, default='configs/bisenetv2.py',)
+parse.add_argument('--weight-path', type=str, default='./res/model_final.pth',)
+parse.add_argument('--input', dest='input', type=str, default='./example.mp4',)
+parse.add_argument('--output', dest='output', type=str, default='./res.mp4',)
+args = parse.parse_args()
+cfg = set_cfg_from_file(args.config)
+
+
+
+# define model
+def get_model():
+    net = model_factory[cfg.model_type](cfg.n_cats, aux_mode='eval')
+    net.load_state_dict(torch.load(args.weight_path, map_location='cpu'), strict=False)
+    net.eval()
+    net.cuda()
+    return net
+
+
+# fetch frames
+def get_func(inpth, in_q):
+    cap = cv2.VideoCapture(args.input)
+    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # type is float
+    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # type is float
+    fps = cap.get(cv2.CAP_PROP_FPS)
+
+    to_tensor = T.ToTensor(
+        mean=(0.3257, 0.3690, 0.3223), # city, rgb
+        std=(0.2112, 0.2148, 0.2115),
+    )
+
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret: break
+        frame = to_tensor(dict(im=frame, lb=None))['im'].unsqueeze(0)
+        frame = frame.flip(dims=(1,)) # rgb
+        in_q.put(frame)
+
+    in_q.put('quit')
+    while not in_q.empty(): continue
+    cap.release()
+    print('input queue done')
+
+
+# save to video
+def save_func(inpth, outpth, out_q):
+    np.random.seed(123)
+    palette = np.random.randint(0, 256, (256, 3), dtype=np.uint8)
+
+    cap = cv2.VideoCapture(args.input)
+    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # type is float
+    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # type is float
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    cap.release()
+
+    video_writer = cv2.VideoWriter(outpth,
+            cv2.VideoWriter_fourcc(*"mp4v"),
+            fps, (int(width), int(height)))
+
+    while True:
+        out = out_q.get()
+        if out == 'quit': break
+        out = out.numpy()
+        preds = palette[out]
+        for pred in preds:
+            video_writer.write(pred)
+    video_writer.release()
+    print('output queue done')
+
+
+# inference a list of frames
+def infer_batch(frames):
+    frames = torch.cat(frames, dim=0).cuda()
+    H, W = frames.size()[2:]
+    frames = F.interpolate(frames, size=(768, 768), mode='bilinear',
+            align_corners=False) # must be divisible by 32
+    out = net(frames)[0]
+    out = F.interpolate(out, size=(H, W), mode='bilinear',
+            align_corners=False).argmax(dim=1).detach().cpu()
+    out_q.put(out)
+
+
+
+if __name__ == '__main__':
+    torch.multiprocessing.set_start_method('spawn')
+
+    in_q = Queue(1024)
+    out_q = Queue(1024)
+
+    in_worker = Process(target=get_func,
+            args=(args.input, in_q))
+    out_worker = Process(target=save_func,
+            args=(args.input, args.output, out_q))
+
+    in_worker.start()
+    out_worker.start()
+
+    net = get_model()
+
+    frames = []
+    while True:
+        frame = in_q.get()
+        if frame == 'quit': break
+
+        frames.append(frame)
+        if len(frames) == 8:
+            infer_batch(frames)
+            frames = []
+    if len(frames) > 0:
+        infer_batch(frames)
+
+    out_q.put('quit')
+
+    out_worker.join()
+    in_worker.join()
diff --git a/video.mp4 b/video.mp4