openvinotoolkit
diff --git a/‎data/dataset_definitions.yml
Lines changed: 1 addition & 1 deletion b/‎data/dataset_definitions.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎demos/3d_segmentation_demo/python/3d_segmentation_demo.py
Lines changed: 0 additions & 7 deletions b/‎demos/3d_segmentation_demo/python/3d_segmentation_demo.py
Lines changed: 0 additions & 7 deletions
diff --git a/‎demos/CMakeLists.txt
Lines changed: 3 additions & 1 deletion b/‎demos/CMakeLists.txt
Lines changed: 3 additions & 1 deletion
diff --git a/‎demos/background_subtraction_demo/python/README.md
Lines changed: 28 additions & 2 deletions b/‎demos/background_subtraction_demo/python/README.md
Lines changed: 28 additions & 2 deletions
diff --git a/‎demos/background_subtraction_demo/python/background_subtraction_demo.py
Lines changed: 59 additions & 16 deletions b/‎demos/background_subtraction_demo/python/background_subtraction_demo.py
Lines changed: 59 additions & 16 deletions
diff --git a/‎demos/background_subtraction_demo/python/models.lst
Lines changed: 2 additions & 0 deletions b/‎demos/background_subtraction_demo/python/models.lst
Lines changed: 2 additions & 0 deletions
diff --git a/‎demos/colorization_demo/python/colorization_demo.py
Lines changed: 1 addition & 1 deletion b/‎demos/colorization_demo/python/colorization_demo.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎demos/common/cpp/monitors/include/monitors/presenter.h
Lines changed: 2 additions & 2 deletions b/‎demos/common/cpp/monitors/include/monitors/presenter.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎demos/common/cpp/monitors/src/presenter.cpp
Lines changed: 1 addition & 1 deletion b/‎demos/common/cpp/monitors/src/presenter.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎demos/common/cpp/utils/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎demos/common/cpp/utils/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
@@ -159,7 +159,7 @@ datasets:
     annotation_conversion:
       converter: mscoco_mask_rcnn
       annotation_file: instances_val2017_persons.json
-      has_background: True
+      has_background: False
       sort_annotations: True
     annotation: mscoco_mask_rcnn_person.pickle
     dataset_meta: mscoco_mask_rcnn_person.json
 
@@ -245,7 +245,6 @@ def read_image(test_data_path, data_name, sizes=(128, 128, 128), is_series=True,
 def main():
     args = parse_arguments()
 
-    # --------------------------------- 1. Load Plugin for inference engine ---------------------------------
     log.info('OpenVINO Inference Engine')
     log.info('\tbuild: {}'.format(get_version()))
     core = Core()
@@ -262,7 +261,6 @@ def main():
         raise AttributeError("Device {} do not support of 3D convolution. "
                              "Please use CPU, GPU or HETERO:*CPU*, HETERO:*GPU*")
 
-    # --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ---------------------
     log.info('Reading model {}'.format(args.path_to_model))
     model = core.read_model(args.path_to_model)
 
@@ -279,12 +277,10 @@ def main():
 
     n, c, d, h, w = model.inputs[0].shape
 
-    # ------------------------------------ 3. Loading model to the plugin -------------------------------------
     compiled_model = core.compile_model(model, args.target_device)
     infer_request = compiled_model.create_infer_request()
     log.info('The model {} is loaded to {}'.format(args.path_to_model, args.target_device))
 
-    # --------------------------------------- 4. Preparing input data -----------------------------------------
     start_time = perf_counter()
     if not os.path.exists(args.path_to_input_data):
         raise AttributeError("Path to input data: '{}' does not exist".format(args.path_to_input_data))
@@ -312,9 +308,7 @@ def main():
         original_data = data_crop
         original_size = original_data.shape[-3:]
 
-    # ---------------------------------------------- 5. Do inference --------------------------------------------
     result = infer_request.infer({input_tensor_name: data_crop})
-    # ---------------------------- 6. Processing of the received inference results ------------------------------
     result = next(iter(result.values()))
     batch, channels, out_d, out_h, out_w = result.shape
 
@@ -375,7 +369,6 @@ def main():
     total_latency = (perf_counter() - start_time) * 1e3
     log.info("Metrics report:")
     log.info("\tLatency: {:.1f} ms".format(total_latency))
-    # --------------------------------------------- 7. Save output -----------------------------------------------
     tiff_output_name = os.path.join(args.path_to_output, 'output.tiff')
     Image.new('RGB', (original_data.shape[3], original_data.shape[2])).save(tiff_output_name,
         append_images=list_img, save_all=True)
 
@@ -134,7 +134,7 @@ macro(add_demo)
         target_include_directories(${OMZ_DEMO_NAME} PRIVATE ${OMZ_DEMO_INCLUDE_DIRECTORIES})
     endif()
 
-    target_link_libraries(${OMZ_DEMO_NAME} PRIVATE ${OpenCV_LIBRARIES} ${InferenceEngine_LIBRARIES}
+    target_link_libraries(${OMZ_DEMO_NAME} PRIVATE ${OpenCV_LIBRARIES} openvino::runtime ${InferenceEngine_LIBRARIES}
                                                     ${OMZ_DEMO_DEPENDENCIES} ngraph::ngraph utils gflags)
 
     if(UNIX)
@@ -143,6 +143,8 @@ macro(add_demo)
 endmacro()
 
 find_package(OpenCV REQUIRED COMPONENTS core highgui videoio imgproc imgcodecs gapi)
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+# TODO: remove InferenceEngine and ngraph after 2022.1
 find_package(InferenceEngine REQUIRED)
 find_package(ngraph REQUIRED)
 
 
@@ -8,7 +8,7 @@ This demo shows how to perform background subtraction using OpenVINO.
 
 ## How It Works
 
-The demo application expects an instance segmentation model in the Intermediate Representation (IR) format with the following constraints:
+The demo application expects an instance segmentation or background matting model in the Intermediate Representation (IR) format with the following constraints:
 1. for instance segmentation models based on `Mask RCNN` approach:
     * One input: `image` for input image.
     * At least three outputs including:
@@ -22,12 +22,29 @@ The demo application expects an instance segmentation model in the Intermediate
         * `conf` with confidence scores for each class for all boxes
         * `mask` with fixed-size mask channels for all boxes.
         * `proto` with fixed-size segmentation heat maps prototypes for all boxes.
+3. for image background matting models:
+    * Two inputs:
+        * `src` for input image
+        * `bgr` for input real background
+    * At least two outputs including:
+        * `fgr` with normalized in [0, 1] range foreground
+        * `pha` with normalized in [0, 1] range alpha
+4. for video background matting models based on RNN architecture:
+    * Five inputs:
+        * `src` for input image
+        * recurrent inputs: `r1`, `r2`, `r3`, `r4`
+    * At least six outputs including:
+        * `fgr` with normalized in [0, 1] range foreground
+        * `pha` with normalized in [0, 1] range alpha
+        * recurrent outputs: `rr1`, `rr2`, `rr3`, `rr4`
 
 The use case for the demo is an online conference where is needed to show only foreground - people and, respectively, to hide or replace background.
 Based on this an instance segmentation model must be trained at least for person class.
 
 As input, the demo application accepts a path to a single image file, a video file or a numeric ID of a web camera specified with a command-line argument `-i`
 
+> **NOTE**: if you use image background matting models, `--background` argument should be specified. This is a background image that equal to a real background behind a person on an input frame and must have the same shape as an input image.
+
 The demo workflow is the following:
 
 1. The demo application reads image/video frames one by one, resizes them to fit into the input image blob of the network (`image`).
@@ -60,6 +77,8 @@ omz_converter --list models.lst
 
 * instance-segmentation-person-????
 * yolact-resnet50-fpn-pytorch
+* background-matting-mobilenetv2
+* robust-video-matting
 
 > **NOTE**: Refer to the tables [Intel's Pre-Trained Models Device Support](../../../models/intel/device_support.md) and [Public Pre-Trained Models Device Support](../../../models/public/device_support.md) for the details on models inference support at different devices.
 
@@ -74,6 +93,7 @@ usage: background_subtraction_demo.py [-h] -m MODEL
                                       [--resize_type {crop,standard,fit_to_window,fit_to_window_letterbox}]
                                       [--labels LABELS]
                                       [--target_bgr TARGET_BGR]
+                                      [--background BACKGROUND]
                                       [--blur_bgr BLUR_BGR]
                                       [-nireq NUM_INFER_REQUESTS]
                                       [-nstreams NUM_STREAMS]
@@ -87,7 +107,8 @@ Options:
   -h, --help            Show this help message and exit.
   -m MODEL, --model MODEL
                         Required. Path to an .xml file with a trained model or
-                        address of model inference service if using OVMS adapter.
+                        address of model inference service if using OVMS
+                        adapter.
   --adapter {openvino,ovms}
                         Optional. Specify the model adapter. Default is
                         openvino.
@@ -110,6 +131,11 @@ Options:
   --target_bgr TARGET_BGR
                         Optional. Background onto which to composite the
                         output (by default to green field).
+  --background BACKGROUND
+                        Optional. Background image for background-matting
+                        model. This is a background image that equal to a real
+                        background behind a person on an input frame and must
+                        have the same shape as an input image.
   --blur_bgr BLUR_BGR   Optional. Background blur strength (by default with
                         value 0 is not applied).
 
 
@@ -26,7 +26,8 @@
 
 sys.path.append(str(Path(__file__).resolve().parents[2] / 'common/python'))
 
-from openvino.model_zoo.model_api.models import MaskRCNNModel, OutputTransform, RESIZE_TYPES, YolactModel
+from openvino.model_zoo.model_api.models import MaskRCNNModel, OutputTransform, RESIZE_TYPES, YolactModel, ImageMattingWithBackground, VideoBackgroundMatting
+from openvino.model_zoo.model_api.models.utils import load_labels
 from openvino.model_zoo.model_api.performance_metrics import PerformanceMetrics
 from openvino.model_zoo.model_api.pipelines import get_user_config, AsyncPipeline
 from openvino.model_zoo.model_api.adapters import create_core, OpenvinoAdapter, OVMSAdapter
@@ -44,7 +45,7 @@ def build_argparser():
     args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
     args.add_argument('-m', '--model', required=True,
                       help='Required. Path to an .xml file with a trained model '
-                           'or address of model inference service if using ovms adapter.')
+                           'or address of model inference service if using OVMS adapter.')
     args.add_argument('--adapter', help='Optional. Specify the model adapter. Default is openvino.',
                       default='openvino', type=str, choices=('openvino', 'ovms'))
     args.add_argument('-i', '--input', required=True,
@@ -61,6 +62,10 @@ def build_argparser():
     args.add_argument('--labels', help='Optional. Labels mapping file.', default=None, type=str)
     args.add_argument('--target_bgr', default=None, type=str,
                       help='Optional. Background onto which to composite the output (by default to green field).')
+    args.add_argument('--background', default=None, type=str,
+                      help='Optional. Background image for background-matting model. This is a background image '
+                           'that equal to a real background behind a person on an input frame and must have the '
+                           'same shape as an input image.')
     args.add_argument('--blur_bgr', default=0, type=int,
                       help='Optional. Background blur strength (by default with value 0 is not applied).')
 
@@ -99,13 +104,31 @@ def build_argparser():
     return parser
 
 
-def get_model(model_adapter, configuration):
+def get_model(model_adapter, configuration, args):
     inputs = model_adapter.get_input_layers()
     outputs = model_adapter.get_output_layers()
+    need_bgr_input = False
+    is_matting_model = False
     if len(inputs) == 1 and len(outputs) == 4 and 'proto' in outputs.keys():
-        return YolactModel(model_adapter, configuration)
+        model = YolactModel(model_adapter, configuration)
+    elif len(inputs) == 5 and len(outputs) == 6 and 'pha' in outputs.keys():
+        model = VideoBackgroundMatting(model_adapter, configuration)
+        is_matting_model = True
+    elif len(inputs) == 2 and len(outputs) in (2, 3) and 'bgr' in inputs.keys():
+        if args.background is None:
+            raise ValueError('The ImageMattingWithBackground model expects the specified "--background" option.')
+        model = ImageMattingWithBackground(model_adapter, configuration)
+        need_bgr_input = True
+        is_matting_model = True
     else:
-        return MaskRCNNModel(model_adapter, configuration)
+        model = MaskRCNNModel(model_adapter, configuration)
+    if not need_bgr_input and args.background is not None:
+        log.warning('The \"--background\" option works only for ImageMattingWithBackground model. Option will be omitted.')
+
+    if args.raw_output_message and is_matting_model:
+        log.warning('\'--raw_output_message\' argument is set but is used background-matting based model, nothing to show')
+        args.raw_output_message = False
+    return model, need_bgr_input
 
 
 def print_raw_results(outputs, frame_id):
@@ -132,14 +155,13 @@ def fit_to_window(input_img, output_resolution):
     return output
 
 
-def render_results(frame, objects, output_resolution, target_bgr, person_id, blur_kernel=0, show_with_original_frame=False):
-    blur_kernel = tuple([blur_kernel] * 2) if blur_kernel else blur_kernel
-    if target_bgr is None:
-        target_bgr = cv2.blur(frame, blur_kernel) if blur_kernel else np.full(frame.shape, [155, 255, 120], dtype=np.uint8)
-    else:
-        target_bgr = cv2.resize(target_bgr, (frame.shape[1], frame.shape[0]))
-        if blur_kernel:
-            target_bgr = cv2.blur(target_bgr, blur_kernel)
+def process_matting(objects, target_bgr):
+    fgr, pha = objects
+    output = fgr * pha + target_bgr * (1 - pha)
+    return (output * 255).astype(np.uint8)
+
+
+def process_masks(objects, frame, target_bgr, person_id):
     classes, masks = objects[1], objects[3]
     # Choose masks only for person class
     valid_inds = classes == person_id
@@ -154,6 +176,23 @@ def render_results(frame, objects, output_resolution, target_bgr, person_id, blu
         composed_mask = cv2.medianBlur(composed_mask.astype(np.uint8), 11)
         composed_mask = np.repeat(np.expand_dims(composed_mask, axis=-1), 3, axis=2)
         output = np.where(composed_mask == 1, frame, target_bgr)
+    return output
+
+
+def render_results(frame, objects, output_resolution, target_bgr, person_id, blur_kernel=0, show_with_original_frame=False):
+    blur_kernel = tuple([blur_kernel] * 2) if blur_kernel else blur_kernel
+    if target_bgr is None:
+        target_bgr = cv2.blur(frame, blur_kernel) if blur_kernel else np.full(frame.shape, [155, 255, 120], dtype=np.uint8)
+    else:
+        target_bgr = cv2.resize(target_bgr, (frame.shape[1], frame.shape[0]))
+        if blur_kernel:
+            target_bgr = cv2.blur(target_bgr, blur_kernel)
+
+    if len(objects) == 4:
+        output = process_masks(objects, frame, target_bgr, person_id)
+    else:
+        output = process_matting(objects, target_bgr.astype(np.float32) / 255)
+
     if show_with_original_frame:
         output = cv2.hconcat([frame, output])
     h, w = output.shape[:2]
@@ -176,14 +215,17 @@ def main():
     elif args.adapter == 'ovms':
         model_adapter = OVMSAdapter(args.model)
 
-    labels = ['__background__', 'person'] if args.labels is None else args.labels
+    labels = ['__background__', 'person'] if args.labels is None else load_labels(args.labels)
+    assert len(labels), 'The file with class labels is empty'
 
     configuration = {
         'confidence_threshold': args.prob_threshold,
         'resize_type': args.resize_type
     }
 
-    model = get_model(model_adapter, configuration)
+    model, need_bgr_input = get_model(model_adapter, configuration, args)
+
+    input_bgr = open_images_capture(args.background, False).read() if need_bgr_input else None
 
     person_id = -1
     for i, label in enumerate(labels):
@@ -226,7 +268,8 @@ def main():
                                                          cap.fps(), tuple(output_resolution)):
                     raise RuntimeError("Can't open video writer")
             # Submit for inference
-            pipeline.submit_data(frame, next_frame_id, {'frame': frame, 'start_time': start_time})
+            data = {'src': frame, 'bgr': input_bgr} if input_bgr is not None else frame
+            pipeline.submit_data(data, next_frame_id, {'frame': frame, 'start_time': start_time})
             next_frame_id += 1
         else:
             # Wait for empty request
 
@@ -1,3 +1,5 @@
 # This file can be used with the --list option of the model downloader.
 instance-segmentation-person-????
 yolact-resnet50-fpn-pytorch
+# TODO: background-matting-mobilenetv2
+# TODO: robust-video-matting
@@ -67,7 +67,7 @@ def main(args):
     core = Core()
 
     log.info('Reading model {}'.format(args.model))
-    model = core.read_model(args.model, args.model.with_suffix(".bin"))
+    model = core.read_model(args.model)
 
     input_tensor_name = 'data_l'
     input_shape = model.input(input_tensor_name).shape
 
@@ -27,8 +27,8 @@ class Presenter {
         cv::Size graphSize = {150, 60},
         std::size_t historySize = 20);
     void addRemoveMonitor(MonitorType monitor);
-    void handleKey(int key); // handles c, d, m, h keys
-    void drawGraphs(cv::Mat& frame);
+    void handleKey(int key); // handles C, D, M, H keys
+    void drawGraphs(const cv::Mat& frame);
     std::vector<std::string> reportMeans() const;
 
     const int yPos;
 
@@ -111,7 +111,7 @@ void Presenter::handleKey(int key) {
     }
 }
 
-void Presenter::drawGraphs(cv::Mat& frame) {
+void Presenter::drawGraphs(const cv::Mat& frame) {
     const std::chrono::steady_clock::time_point curTimeStamp = std::chrono::steady_clock::now();
     if (curTimeStamp - prevTimeStamp >= std::chrono::milliseconds{1000}) {
         prevTimeStamp = curTimeStamp;
 
@@ -10,4 +10,4 @@ source_group("include" FILES ${HEADERS})
 
 add_library(utils STATIC ${HEADERS} ${SOURCES})
 target_include_directories(utils PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
-target_link_libraries(utils PRIVATE gflags ${InferenceEngine_LIBRARIES} opencv_core opencv_imgcodecs opencv_videoio)
+target_link_libraries(utils PRIVATE gflags openvino::runtime ${InferenceEngine_LIBRARIES} opencv_core opencv_imgcodecs opencv_videoio)
Original file line number	Diff line number	Diff line change
`@@ -111,7 +111,7 @@ void Presenter::handleKey(int key) {`
`111`	`111`	`}`
`112`	`112`	`}`
`113`	`113`
`114`		`-void Presenter::drawGraphs(cv::Mat& frame) {`
	`114`	`+void Presenter::drawGraphs(const cv::Mat& frame) {`
`115`	`115`	`const std::chrono::steady_clock::time_point curTimeStamp = std::chrono::steady_clock::now();`
`116`	`116`	`if (curTimeStamp - prevTimeStamp >= std::chrono::milliseconds{1000}) {`
`117`	`117`	`prevTimeStamp = curTimeStamp;`