Add OpenVINO backend

dkurt · dkurt · commit ffc25a9da424 · 2020-09-15T19:03:38.000+03:00
diff --git a/README.md b/README.md
@@ -77,6 +77,14 @@ $ source venv3/bin/activate
 (venv3) $ bonito download --all
 ```
 
+To optimize inference on CPU with Intel OpenVINO:
+* Download and install OpenVINO from https://software.seek.intel.com/openvino-toolkit
+```bash
+(venv3) $ pip install -r /opt/intel/openvino/deployment_tools/model_optimizer/requirements_onnx.txt
+(venv3) $ source /opt/intel/openvino/bin/setupvars.sh
+(venv3) $ bonito evaluate dna_r9.4.1 --use_openvino --device=cpu
+```
+
 ## Medaka
 
 The Medaka can be downloaded from [here](https://nanoporetech.box.com/shared/static/u5gncwjbtg2k3dkw26nmvdvck65ab3xh.hdf5).
diff --git a/bonito/evaluate.py b/bonito/evaluate.py
@@ -30,7 +30,7 @@ def main(args):
     for w in [int(i) for i in args.weights.split(',')]:
 
         print("* loading model", w)
-        model = load_model(args.model_directory, args.device, weights=w, half=args.half)
+        model = load_model(args.model_directory, args.device, weights=w, half=args.half, use_openvino=args.use_openvino)
 
         print("* calling")
         predictions = []
@@ -39,9 +39,8 @@ def main(args):
         with torch.no_grad():
             for data, *_ in dataloader:
                 if args.half:
-                    data = data.type(torch.float16).to(args.device)
-                else:
-                    data = data.to(args.device)
+                    data = data.type(torch.float16)
+                data = data.to(args.device if not args.use_openvino else 'cpu')
                 log_probs = model(data)
                 predictions.append(log_probs.exp().cpu().numpy().astype(np.float32))
 
@@ -90,4 +89,5 @@ def argparser():
     parser.add_argument("--poa", action="store_true", default=False)
     parser.add_argument("--shuffle", action="store_true", default=True)
     parser.add_argument("--min-coverage", default=0.5, type=float)
+    parser.add_argument("--use_openvino", action="store_true", default=False)
     return parser
diff --git a/bonito/openvino/mo_extension/front/onnx/conv2d.py b/bonito/openvino/mo_extension/front/onnx/conv2d.py
@@ -0,0 +1,31 @@
+# mo_extensions/front/onnx/conv2d.py
+import numpy as np
+
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph, Node
+
+class Conv1dToConv2d(FrontReplacementSubgraph):
+    enabled = True
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('conv', dict(op='Conv')),
+                ('weights', dict(op='Const')),
+            ],
+            edges=[
+                ('weights', 'conv', {'in': 1})
+            ])
+
+    @staticmethod
+    def replace_sub_graph(graph: Graph, match: dict):
+        conv = match['conv']
+        conv['pad'] = np.insert(conv['pad'], 2, [0, 0], axis=0)
+        conv['stride'] = np.insert(conv['stride'], 2, 1)
+        conv['dilation'] = np.insert(conv['dilation'], 2, 1)
+        conv['kernel_spatial'] = np.insert(conv['kernel_spatial'], 0, 1)
+
+        weights = match['weights']
+        weights['shape'] = np.insert(weights['shape'], 2, 1)
+        weights['pb'].dims.insert(2, 1)
+        weights['value'] = np.expand_dims(weights['value'], axis=2)
diff --git a/bonito/openvino/mo_extension/front/onnx/swish.py b/bonito/openvino/mo_extension/front/onnx/swish.py
@@ -0,0 +1,37 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.activation_ops import Sigmoid
+from extensions.ops.elementwise import Mul
+from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Node, Graph
+
+
+class Swish(FrontReplacementOp):
+    op = "Swish"
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        mul_node = Mul(graph, {'name': node.name + '/mul_'}).create_node()
+        sigmoid_node = Sigmoid(graph, {'name': node.name + '/sigmoid_'}).create_node()
+
+        # Connect nodes
+        node.in_port(0).get_connection().get_source().connect(mul_node.in_port(0))
+        node.in_port(0).get_connection().get_source().connect(sigmoid_node.in_port(0))
+        sigmoid_node.out_port(0).connect(mul_node.in_port(1))
+
+        # The "explicit" version of the return value is: [(out_node.id, 0)])
+        return [mul_node.id]
diff --git a/bonito/openvino/mo_extension/front/onnx/transpose.py b/bonito/openvino/mo_extension/front/onnx/transpose.py
@@ -0,0 +1,16 @@
+# mo_extensions/front/onnx/conv2d.py
+import numpy as np
+
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph, Node
+from extensions.ops.transpose import Transpose
+from mo.front.extractor import FrontExtractorOp
+
+class Transpose2d(FrontExtractorOp):
+    op = 'Transpose'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node: Node):
+        Transpose.update_node_stat(node, {'order': [0, 3, 2, 1]})
+        return cls.enabled
diff --git a/bonito/openvino/model.py b/bonito/openvino/model.py
@@ -0,0 +1,114 @@
+import os
+import numpy as np
+import torch
+
+try:
+    from openvino.inference_engine import IECore, StatusCode
+except ImportError:
+    pass
+
+class OpenVINOModel:
+
+    def __init__(self, model, half, dirname):
+        self.model = model
+        self.alphabet = model.alphabet
+
+        onnx_path = os.path.join(dirname, model.config['model']) + '.onnx'
+        model_name = model.config['model'] + ('_fp16' if half else '')
+        xml_path, bin_path = [os.path.join(dirname, model_name) + ext for ext in ['.xml', '.bin']]
+        if not os.path.exists(xml_path) or not os.path.exists(bin_path):
+
+            # Convert to ONNX
+            if not os.path.exists(onnx_path):
+                inp = torch.randn(1, 1, 1000)  # Just dummy input shape. We will reshape model later
+                model.eval()
+                with torch.no_grad():
+                    torch.onnx.export(model, inp, onnx_path,
+                                    input_names=['input'],
+                                    output_names=['output'],
+                                    operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
+
+            # Convert to IR
+            import mo_onnx
+            import subprocess
+            subprocess.call([mo_onnx.__file__,
+                             '--input_model', onnx_path,
+                             '--extension', os.path.join(os.path.dirname(__file__), 'mo_extension'),
+                             '--keep_shape_ops',
+                             '--model_name', model_name,
+                             '--data_type', 'FP16' if half else 'FP32',
+                             '--input_shape=[1,1,1,1000]',
+                             '--output_dir', dirname])
+
+        self.ie = IECore()
+        self.net = self.ie.read_network(xml_path, bin_path)
+        self.exec_net = None
+
+
+    def eval(self):
+        pass
+
+
+    def half(self):
+        return self
+
+
+    def to(self, device):
+        self.device = str(device).upper()
+
+
+    def __call__(self, data):
+        data = np.expand_dims(data, axis=2)  # 1D->2D
+        batch_size = data.shape[0]
+        if not self.exec_net:
+            inp_shape = list(data.shape)
+            inp_shape[0] = 1  # We will run the batch asynchronously
+            self.net.reshape({'input': inp_shape})
+            config = {}
+            if self.device == 'CPU':
+                config={'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}
+            self.exec_net = self.ie.load_network(self.net, self.device,
+                                                 config=config, num_requests=0)
+
+        # List that maps infer requests to index of processed chunk from batch.
+        # -1 means that request has not been started yet.
+        infer_request_input_id = [-1] * len(self.exec_net.requests)
+        output = np.zeros([batch_size] + self.net.outputs['output'].shape[1:], dtype=np.float32)
+
+        for inp_id in range(batch_size):
+            # Get idle infer request
+            infer_request_id = self.exec_net.get_idle_request_id()
+            if infer_request_id < 0:
+                status = self.exec_net.wait(num_requests=1)
+                if status != StatusCode.OK:
+                    raise Exception("Wait for idle request failed!")
+                infer_request_id = self.exec_net.get_idle_request_id()
+                if infer_request_id < 0:
+                    raise Exception("Invalid request id!")
+
+            out_id = infer_request_input_id[infer_request_id]
+            request = self.exec_net.requests[infer_request_id]
+
+            # Copy output prediction
+            if out_id != -1:
+                output[out_id] = request.output_blobs['output'].buffer
+
+            # Start this request on new data
+            infer_request_input_id[infer_request_id] = inp_id
+            request.async_infer({'input': data[inp_id]})
+            inp_id += 1
+
+        # Wait for the rest of requests
+        status = self.exec_net.wait()
+        if status != StatusCode.OK:
+            raise Exception("Wait for idle request failed!")
+        for infer_request_id, out_id in enumerate(infer_request_input_id):
+            request = self.exec_net.requests[infer_request_id]
+            output[out_id] = request.output_blobs['output'].buffer
+
+        output = np.squeeze(output, axis=2)  # 2D->1D
+        return torch.tensor(output)
+
+
+    def decode(self, post, beamsize):
+        return self.model.decode(post, beamsize=beamsize)
diff --git a/bonito/util.py b/bonito/util.py
@@ -10,6 +10,7 @@
 
 from bonito.model import Model
 from bonito_cuda_runtime import CuModel
+from bonito.openvino.model import OpenVINOModel
 
 import toml
 import torch
@@ -75,7 +76,7 @@ def init(seed, device):
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
-    if device == "cpu": return
+    if not device.startswith('cuda'): return
     torch.backends.cudnn.enabled = True
     torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False
@@ -86,7 +87,10 @@ def half_supported():
     """
     Returns whether FP16 is support on the GPU
     """
-    return get_device_capability()[0] >= 7
+    try:
+        return get_device_capability()[0] >= 7
+    except:
+        return False
 
 
 def phred(prob, scale=1.0, bias=0.0):
@@ -238,7 +242,7 @@ def load_data(shuffle=False, limit=None, directory=None, validation=False):
     return chunks, chunk_lengths, targets, target_lengths
 
 
-def load_model(dirname, device, weights=None, half=False, chunksize=0, use_rt=False):
+def load_model(dirname, device, weights=None, half=False, chunksize=0, use_rt=False, use_openvino=False):
     """
     Load a model from disk
     """
@@ -251,21 +255,25 @@ def load_model(dirname, device, weights=None, half=False, chunksize=0, use_rt=Fa
             raise FileNotFoundError("no model weights found in '%s'" % dirname)
         weights = max([int(re.sub(".*_([0-9]+).tar", "\\1", w)) for w in weight_files])
 
-    device = torch.device(device)
+    if not use_openvino:
+        device = torch.device(device)
     config = os.path.join(dirname, 'config.toml')
     weights = os.path.join(dirname, 'weights_%s.tar' % weights)
     model = Model(toml.load(config))
 
-    state_dict = torch.load(weights, map_location=device)
+    state_dict = torch.load(weights, map_location=device if not use_openvino else 'cpu')
     new_state_dict = OrderedDict()
     for k, v in state_dict.items():
         name = k.replace('module.', '')
         new_state_dict[name] = v
 
     model.load_state_dict(new_state_dict)
 
+    assert(not use_rt or not use_openvino)
     if use_rt:
         model = CuModel(model.config, chunksize, new_state_dict)
+    elif use_openvino:
+        model = OpenVINOModel(model, half, dirname)
 
     if half: model = model.half()
     model.eval()