Add OpenVINO backend

dkurt · dkurt · commit c8bf72cc9870 · 2020-11-02T13:32:23.000+03:00
diff --git a/README.md b/README.md
@@ -28,6 +28,13 @@ $ source venv3/bin/activate
 (venv3) $ bonito download --models --latest
 ```
 
+To optimize inference on CPU with Intel OpenVINO:
+
+```bash
+(venv3) $ export LD_LIBRARY_PATH=$(pwd)/venv3/lib:$LD_LIBRARY_PATH
+(venv3) $ bonito evaluate dna_r9.4.1 --use_openvino --device=cpu
+```
+
 ## Training your own model
 
 To train a model using your own reads, first basecall the reads with the additional `--save-ctc` flag and use the output directory as the input directory for training.
diff --git a/bonito/cli/basecaller.py b/bonito/cli/basecaller.py
@@ -23,7 +23,7 @@ def main(args):
         exit(1)
 
     sys.stderr.write("> loading model\n")
-    model = load_model(args.model_directory, args.device, weights=int(args.weights))
+    model = load_model(args.model_directory, args.device, weights=int(args.weights), use_openvino=args.use_openvino)
 
     if args.reference:
         sys.stderr.write("> loading reference\n")
@@ -83,6 +83,7 @@ def argparser():
     parser.add_argument("--skip", action="store_true", default=False)
     parser.add_argument("--fastq", action="store_true", default=False)
     parser.add_argument("--save-ctc", action="store_true", default=False)
+    parser.add_argument("--use_openvino", action="store_true", default=False)
     parser.add_argument("--ctc-min-coverage", default=0.9, type=float)
     parser.add_argument("--ctc-min-accuracy", default=0.9, type=float)
     return parser
diff --git a/bonito/cli/evaluate.py b/bonito/cli/evaluate.py
@@ -35,7 +35,7 @@ def main(args):
         seqs = []
 
         print("* loading model", w)
-        model = load_model(args.model_directory, args.device, weights=w)
+        model = load_model(args.model_directory, args.device, weights=w, use_openvino=args.use_openvino)
 
         print("* calling")
         t0 = time.perf_counter()
@@ -93,4 +93,5 @@ def argparser():
     parser.add_argument("--poa", action="store_true", default=False)
     parser.add_argument("--shuffle", action="store_true", default=True)
     parser.add_argument("--min-coverage", default=0.5, type=float)
+    parser.add_argument("--use_openvino", action="store_true", default=False)
     return parser
diff --git a/bonito/ctc/model.py b/bonito/ctc/model.py
@@ -3,7 +3,7 @@
 """
 
 import numpy as np
-from bonito.nn import Permute, activations
+from bonito.nn import Add, Permute, activations
 from torch.nn.functional import log_softmax
 from torch.nn import Module, ModuleList, Sequential, Conv1d, BatchNorm1d, Dropout
 
@@ -121,6 +121,7 @@ def __init__(self, in_channels, out_channels, activation, repeat=5, kernel_size=
 
         self.use_res = residual
         self.conv = ModuleList()
+        self.add = Add()
 
         _in_channels = in_channels
         padding = self.get_padding(kernel_size[0], stride[0], dilation[0])
@@ -178,7 +179,7 @@ def forward(self, x):
         for layer in self.conv:
             _x = layer(_x)
         if self.use_res:
-            _x = _x + self.residual(x)
+            _x = self.add(_x, self.residual(x))
         return self.activation(_x)
 
 
diff --git a/bonito/openvino/loader.py b/bonito/openvino/loader.py
@@ -0,0 +1,112 @@
+# This script provides a method which builds OpenVINO network in runtime
+import numpy as np
+from openvino.inference_engine import IECore, IENetwork
+
+import ngraph.opset4 as ng
+from ngraph.impl.op import Parameter
+from ngraph.impl import Function, Shape, Type
+
+import torch
+from torch.autograd import Variable
+
+
+nodes = {}
+out = None
+
+def forward_hook(self, inputs, output):
+    global out
+    layer_type = self.__class__.__name__
+
+    params = [value.numpy() for value in self.state_dict().values()]
+
+    inp = nodes[inputs[0].data_ptr()]
+    if layer_type == 'Conv1d':
+        weights = np.expand_dims(params[0], axis=2)
+        if self.groups == 1:
+            out = ng.convolution(inp, weights,
+                                 [1, self.stride[0]],
+                                 [0, self.padding[0]],
+                                 [0, self.padding[0]],
+                                 [1, self.dilation[0]])
+
+        else:
+            weights = weights.reshape(self.groups, weights.shape[0] // self.groups, weights.shape[1], weights.shape[2], weights.shape[3])
+            out = ng.group_convolution(inp, weights,
+                                       [1, self.stride[0]],
+                                       [0, self.padding[0]],
+                                       [0, self.padding[0]],
+                                       [1, self.dilation[0]])
+        if len(params) > 1:
+            assert(len(params) == 2)
+            bias = params[1].reshape(1, params[1].shape[0], 1, 1)
+            out = ng.add(out, bias)
+
+    elif layer_type == 'BatchNorm1d':
+        out = ng.batch_norm_inference(inp, params[0], params[1], params[2], params[3], self.eps)
+    elif layer_type == 'Swish':
+        out = ng.swish(inp)
+    elif layer_type == 'Add':
+        out = ng.add(inp, nodes[inputs[1].data_ptr()])
+    elif layer_type == 'Dropout':
+        return
+    elif layer_type == 'Permute':
+        order = []
+        # 1D to 2D: i.e. (2, 0, 1) -> (2, 3, 0, 1)
+        for d in self.dims:
+            assert(d <= 2)
+            order.append(d)
+            if d == 2:
+                order.append(3)
+        out = ng.transpose(inp, order)
+    else:
+        raise Exception('Unknown layer type: ' + layer_type)
+
+    nodes[output.data_ptr()] = out
+
+
+def sanity_check(net, inp, ref):
+    ie = IECore()
+    exec_net = ie.load_network(net, 'CPU')
+    ie_out = exec_net.infer({'input': inp.numpy()})
+    ie_out = next(iter(ie_out.values()))
+
+    ref = ref.numpy().reshape(ie_out.shape)
+    diff = np.max(np.abs(ie_out - ref))
+    print('PyTorch / OpenVINO diff:', diff)
+    print('Reference values range: [{}, {}]'.format(np.min(ref), np.max(ref)))
+    if diff > 1.1e-4:
+        raise Exception('Sanity check failed with diff', diff)
+
+
+def torch2openvino(model):
+    with torch.no_grad():
+        model.eval()
+        hooks = []
+        for module in model.modules():
+            if len([m for m in module.modules()]) != 1:
+                continue
+            hooks.append(module.register_forward_hook(forward_hook))
+
+        # Just a dummy input to make forward pass
+        inp = Variable(torch.randn([1, 1, 1000]))
+
+        param = Parameter(Type.f32, Shape([1, 1, 1, 1000]))
+        nodes[inp.data_ptr()] = param
+        ref = model(inp)
+
+        for hook in hooks:
+            hook.remove()
+
+    out_node = ng.log(ng.softmax(out, axis=3))
+
+    param.set_friendly_name('input')
+    out_node.set_friendly_name('output')
+    func = Function([out_node], [param], '')
+
+    caps = Function.to_capsule(func)
+    net = IENetwork(caps)
+
+    # Uncomment to perform conversion check
+    # sanity_check(net, inp, ref)
+
+    return net
diff --git a/bonito/openvino/model.py b/bonito/openvino/model.py
@@ -0,0 +1,99 @@
+import os
+import numpy as np
+import torch
+
+try:
+    from openvino.inference_engine import IECore, StatusCode
+    from .loader import torch2openvino
+except ImportError:
+    pass
+
+class OpenVINOModel:
+
+    def __init__(self, model, half, dirname):
+        self.model = model
+        self.alphabet = model.alphabet
+        self.parameters = model.parameters
+        self.stride = model.stride
+
+        model_name = 'model' + ('_fp16' if half else '')
+        xml_path, bin_path = [os.path.join(dirname, model_name) + ext for ext in ['.xml', '.bin']]
+        self.ie = IECore()
+        if os.path.exists(xml_path) and os.path.exists(bin_path):
+            self.net = self.ie.read_network(xml_path, bin_path)
+        else:
+            self.net = torch2openvino(model)
+        self.exec_net = None
+
+
+    def eval(self):
+        pass
+
+
+    def half(self):
+        return self
+
+
+    def to(self, device):
+        self.device = str(device).upper()
+
+
+    def __call__(self, data):
+        data = np.expand_dims(data, axis=2)  # 1D->2D
+        batch_size = data.shape[0]
+        inp_shape = list(data.shape)
+        inp_shape[0] = 1  # We will run the batch asynchronously
+        if not self.exec_net or self.exec_net.input_info['input'].tensor_desc.dims != inp_shape:
+            self.net.reshape({'input': inp_shape})
+            config = {}
+            if self.device == 'CPU':
+                config={'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}
+            self.exec_net = self.ie.load_network(self.net, self.device,
+                                                 config=config, num_requests=0)
+
+        # List that maps infer requests to index of processed chunk from batch.
+        # -1 means that request has not been started yet.
+        infer_request_input_id = [-1] * len(self.exec_net.requests)
+        output = np.zeros([batch_size] + self.net.outputs['output'].shape[1:], dtype=np.float32)
+
+        for inp_id in range(batch_size):
+            # Get idle infer request
+            infer_request_id = self.exec_net.get_idle_request_id()
+            if infer_request_id < 0:
+                status = self.exec_net.wait(num_requests=1)
+                if status != StatusCode.OK:
+                    raise Exception("Wait for idle request failed!")
+                infer_request_id = self.exec_net.get_idle_request_id()
+                if infer_request_id < 0:
+                    raise Exception("Invalid request id!")
+
+            out_id = infer_request_input_id[infer_request_id]
+            request = self.exec_net.requests[infer_request_id]
+
+            # Copy output prediction
+            if out_id != -1:
+                output[out_id] = request.output_blobs['output'].buffer
+
+            # Start this request on new data
+            infer_request_input_id[infer_request_id] = inp_id
+            request.async_infer({'input': data[inp_id]})
+            inp_id += 1
+
+        # Wait for the rest of requests
+        status = self.exec_net.wait()
+        if status != StatusCode.OK:
+            raise Exception("Wait for idle request failed!")
+        for infer_request_id, out_id in enumerate(infer_request_input_id):
+            if out_id == -1:
+                continue
+            request = self.exec_net.requests[infer_request_id]
+            output[out_id] = request.output_blobs['output'].buffer
+
+        output = np.squeeze(output, axis=2)  # 2D->1D
+        output = output.transpose(1, 0, 2)  # Model should produce WNC (width, batch, features)
+        return torch.tensor(output)
+
+
+    def decode(self, x, beamsize=5, threshold=1e-3, qscores=False, return_path=False):
+        return self.model.decode(x, beamsize=beamsize, threshold=threshold,
+                                 qscores=qscores, return_path=return_path)
diff --git a/bonito/util.py b/bonito/util.py
@@ -17,6 +17,7 @@
 import parasail
 import numpy as np
 from torch.cuda import get_device_capability
+from bonito.openvino.model import OpenVINOModel
 
 try:
     from claragenomics.bindings import cuda
@@ -44,7 +45,7 @@ def init(seed, device):
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
-    if device == "cpu": return
+    if not device.startswith('cuda'): return
     torch.backends.cudnn.enabled = True
     torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False
@@ -263,7 +264,7 @@ def match_names(state_dict, model):
     return OrderedDict([(k, remap[k]) for k in state_dict.keys()])
 
 
-def load_model(dirname, device, weights=None, half=None, chunksize=0):
+def load_model(dirname, device, weights=None, half=None, chunksize=0, use_openvino=False):
     """
     Load a model from disk
     """
@@ -276,14 +277,15 @@ def load_model(dirname, device, weights=None, half=None, chunksize=0):
             raise FileNotFoundError("no model weights found in '%s'" % dirname)
         weights = max([int(re.sub(".*_([0-9]+).tar", "\\1", w)) for w in weight_files])
 
-    device = torch.device(device)
+    if not use_openvino:
+        device = torch.device(device)
     config = toml.load(os.path.join(dirname, 'config.toml'))
     weights = os.path.join(dirname, 'weights_%s.tar' % weights)
 
     Model = load_symbol(config, "Model")
     model = Model(config)
 
-    state_dict = torch.load(weights, map_location=device)
+    state_dict = torch.load(weights, map_location=device if not use_openvino else 'cpu')
     state_dict = {k2: state_dict[k1] for k1, k2 in match_names(state_dict, model).items()}
     new_state_dict = OrderedDict()
     for k, v in state_dict.items():
@@ -292,6 +294,9 @@ def load_model(dirname, device, weights=None, half=None, chunksize=0):
 
     model.load_state_dict(new_state_dict)
 
+    if use_openvino:
+        model = OpenVINOModel(model, half, dirname)
+
     if half is None:
         half = half_supported()
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 mappy==2.17
 toml==0.10.0
 tqdm==4.31.1
-numpy<=1.18.5
+numpy<=1.16.3
 torch>=1.1.0,<=1.5
 optuna==1.1.0
 parasail==1.2
@@ -12,3 +12,4 @@ ont-fast5-api==3.1.6
 fast-ctc-decode==0.2.5
 #bonito-cuda-runtime==0.0.2a2
 #pyclaragenomics-cuda-10-0==0.4.2
+openvino-python==2021.1