Skip to content

Commit ebc7792

Browse files
committed
Intel OpenVINO backend
1 parent c8417b7 commit ebc7792

File tree

10 files changed

+296
-16
lines changed

10 files changed

+296
-16
lines changed

README.md

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Bonito
22

3-
[![PyPI version](https://badge.fury.io/py/ont-bonito.svg)](https://badge.fury.io/py/ont-bonito)
3+
[![PyPI version](https://badge.fury.io/py/ont-bonito.svg)](https://badge.fury.io/py/ont-bonito)
44
[![py36](https://img.shields.io/badge/python-3.6-brightgreen.svg)](https://img.shields.io/badge/python-3.6-brightgreen.svg)
55
[![py37](https://img.shields.io/badge/python-3.7-brightgreen.svg)](https://img.shields.io/badge/python-3.7-brightgreen.svg)
66
[![py38](https://img.shields.io/badge/python-3.8-brightgreen.svg)](https://img.shields.io/badge/python-3.8-brightgreen.svg)
@@ -35,6 +35,12 @@ The default `ont-bonito` package is built against CUDA 10.2 however CUDA 11.1 an
3535
$ pip install -f https://download.pytorch.org/whl/torch_stable.html ont-bonito-cuda111
3636
```
3737

38+
To optimize inference on CPU with Intel OpenVINO use `--use_openvino` flag:
39+
40+
```bash
41+
$ bonito basecaller dna_r9.4.1 --reference reference.mmi --use_openvino --device=cpu /data/reads > basecalls.sam
42+
```
43+
3844
## Modified Bases
3945

4046
Modified base calling is handled by [Remora](https://github.com/nanoporetech/remora).
@@ -54,7 +60,7 @@ $ bonito basecaller dna_r9.4.1 --save-ctc --reference reference.mmi /data/reads
5460
$ bonito train --directory /data/training/ctc-data /data/training/model-dir
5561
```
5662

57-
In addition to training a new model from scratch you can also easily fine tune one of the pretrained models.
63+
In addition to training a new model from scratch you can also easily fine tune one of the pretrained models.
5864

5965
```bash
6066
bonito train --epochs 1 --lr 5e-4 --pretrained dna_r10.4_e8.1_sup@v3.4 --directory /data/training/ctc-data /data/training/fine-tuned-model
@@ -67,7 +73,7 @@ $ bonito download --training
6773
$ bonito train /data/training/model-dir
6874
```
6975

70-
All training calls use Automatic Mixed Precision to speed up training. To disable this, set the `--no-amp` flag to True.
76+
All training calls use Automatic Mixed Precision to speed up training. To disable this, set the `--no-amp` flag to True.
7177

7278
## Developer Quickstart
7379

@@ -81,6 +87,11 @@ $ source venv3/bin/activate
8187
(venv3) $ python setup.py develop
8288
```
8389

90+
To build with OpenVINO backend:
91+
```bash
92+
(venv3) $ pip install develop .[openvino]
93+
```
94+
8495
## Interface
8596

8697
- `bonito view` - view a model architecture for a given `.toml` file and the number of parameters in the network.

bonito/cli/basecaller.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def main(args):
4040
batchsize=args.batchsize,
4141
quantize=args.quantize,
4242
use_koi=True,
43+
use_openvino=args.use_openvino,
4344
)
4445
except FileNotFoundError:
4546
sys.stderr.write(f"> error: failed to load {args.model_directory}\n")
@@ -172,4 +173,5 @@ def argparser():
172173
parser.add_argument("--batchsize", default=None, type=int)
173174
parser.add_argument("--max-reads", default=0, type=int)
174175
parser.add_argument('-v', '--verbose', action='count', default=0)
176+
parser.add_argument("--use_openvino", action="store_true", default=False)
175177
return parser

bonito/cli/evaluate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def main(args):
4545
seqs = []
4646

4747
print("* loading model", w)
48-
model = load_model(args.model_directory, args.device, weights=w)
48+
model = load_model(args.model_directory, args.device, weights=w, use_openvino=args.use_openvino)
4949

5050
print("* calling")
5151
t0 = time.perf_counter()
@@ -109,4 +109,5 @@ def argparser():
109109
parser.add_argument("--beamsize", default=5, type=int)
110110
parser.add_argument("--poa", action="store_true", default=False)
111111
parser.add_argument("--min-coverage", default=0.5, type=float)
112+
parser.add_argument("--use_openvino", action="store_true", default=False)
112113
return parser

bonito/crf/basecall.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ def compute_scores(model, batch, beam_width=32, beam_cut=100.0, scale=1.0, offse
2828
"""
2929
with torch.inference_mode():
3030
device = next(model.parameters()).device
31-
dtype = torch.float16 if half_supported() else torch.float32
31+
dtype = torch.float16 if device != torch.device('cpu') and half_supported() else torch.float32
3232
scores = model(batch.to(dtype).to(device))
3333
if reverse:
3434
scores = model.seqdist.reverse_complement(scores)
35+
# beam_search expects scores in FP16 precision
3536
sequence, qstring, moves = beam_search(
36-
scores, beam_width=beam_width, beam_cut=beam_cut,
37+
scores.to(torch.float16), beam_width=beam_width, beam_cut=beam_cut,
3738
scale=scale, offset=offset, blank_score=blank_score
3839
)
3940
return {

bonito/crf/model.py

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
import numpy as np
77
from bonito.nn import Module, Convolution, LinearCRFEncoder, Serial, Permute, layers, from_dict
88

9-
import seqdist.sparse
10-
from seqdist.ctc_simple import logZ_cupy, viterbi_alignments
9+
if torch.cuda.is_available():
10+
import seqdist.sparse
11+
from seqdist.ctc_simple import logZ_cupy, viterbi_alignments
1112
from seqdist.core import SequenceDist, Max, Log, semiring
1213

1314

@@ -21,6 +22,58 @@ def get_stride(m):
2122
return 1
2223

2324

25+
def logZ_fwd_cpu(Ms, idx, v0, vT, S):
26+
T, N, C, NZ = Ms.shape
27+
Ms_grad = torch.zeros(T, N, C, NZ)
28+
29+
a = v0
30+
for t in range(T):
31+
s = S.mul(a[:, idx], Ms[t])
32+
a = S.sum(s, -1)
33+
Ms_grad[t] = s
34+
return S.sum(a + vT, dim=1), Ms_grad
35+
36+
37+
def logZ_bwd_cpu(Ms, idx, vT, S, K=1):
38+
assert(K == 1)
39+
T, N, C, NZ = Ms.shape
40+
Ms = Ms.reshape(T, N, -1)
41+
idx_T = idx.flatten().argsort().to(dtype=torch.long).reshape(C, NZ)
42+
43+
betas = torch.ones(T + 1, N, C)
44+
45+
a = vT
46+
betas[T] = a
47+
for t in reversed(range(T)):
48+
s = S.mul(a[:, idx_T // NZ], Ms[t, :, idx_T])
49+
a = S.sum(s, -1)
50+
betas[t] = a
51+
return betas
52+
53+
54+
class _LogZ(torch.autograd.Function):
55+
@staticmethod
56+
def forward(ctx, Ms, idx, v0, vT, S:semiring):
57+
idx = idx.to(dtype=torch.long, device=Ms.device)
58+
logZ, Ms_grad = logZ_fwd_cpu(Ms, idx, v0, vT, S)
59+
ctx.save_for_backward(Ms_grad, Ms, idx, vT)
60+
ctx.semiring = S
61+
return logZ
62+
63+
@staticmethod
64+
def backward(ctx, grad):
65+
Ms_grad, Ms, idx, vT = ctx.saved_tensors
66+
S = ctx.semiring
67+
T, N, C, NZ = Ms.shape
68+
betas = logZ_bwd_cpu(Ms, idx, vT, S)
69+
Ms_grad = S.mul(Ms_grad, betas[1:,:,:,None])
70+
Ms_grad = S.dsum(Ms_grad.reshape(T, N, -1), dim=2).reshape(T, N, C, NZ)
71+
return grad[None, :, None, None] * Ms_grad, None, None, None, None, None
72+
73+
def sparse_logZ(Ms, idx, v0, vT, S:semiring=Log):
74+
return _LogZ.apply(Ms, idx, v0, vT, S)
75+
76+
2477
class CTC_CRF(SequenceDist):
2578

2679
def __init__(self, state_len, alphabet):
@@ -43,7 +96,10 @@ def logZ(self, scores, S:semiring=Log):
4396
Ms = scores.reshape(T, N, -1, len(self.alphabet))
4497
alpha_0 = Ms.new_full((N, self.n_base**(self.state_len)), S.one)
4598
beta_T = Ms.new_full((N, self.n_base**(self.state_len)), S.one)
46-
return seqdist.sparse.logZ(Ms, self.idx, alpha_0, beta_T, S)
99+
if not Ms.device.index is None:
100+
return seqdist.sparse.logZ(Ms, self.idx, alpha_0, beta_T, S)
101+
else:
102+
return sparse_logZ(Ms, self.idx, alpha_0, beta_T, S)
47103

48104
def normalise(self, scores):
49105
return (scores - self.logZ(scores)[:, None] / len(scores))
@@ -58,7 +114,10 @@ def backward_scores(self, scores, S: semiring=Log):
58114
T, N, _ = scores.shape
59115
Ms = scores.reshape(T, N, -1, self.n_base + 1)
60116
beta_T = Ms.new_full((N, self.n_base**(self.state_len)), S.one)
61-
return seqdist.sparse.bwd_scores_cupy(Ms, self.idx, beta_T, S, K=1)
117+
if not Ms.device.index is None:
118+
return seqdist.sparse.bwd_scores_cupy(Ms, self.idx, beta_T, S, K=1)
119+
else:
120+
return logZ_bwd_cpu(Ms, self.idx, beta_T, S, K=1)
62121

63122
def compute_transition_probs(self, scores, betas):
64123
T, N, C = scores.shape

bonito/ctc/basecall.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ def compute_scores(model, batch):
3535
"""
3636
with torch.no_grad():
3737
device = next(model.parameters()).device
38-
chunks = batch.to(torch.half).to(device)
38+
chunks = batch.to(torch.half) if device != torch.device('cpu') and half_supported() else batch
39+
chunks = chunks.to(device)
3940
probs = permute(model(chunks), 'TNC', 'NTC')
4041
return probs.cpu().to(torch.float32)
4142

bonito/openvino/loader.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import torch.nn as nn
2+
3+
4+
def convert_to_2d(model):
5+
for name, l in model.named_children():
6+
layer_type = l.__class__.__name__
7+
if layer_type == 'Conv1d':
8+
new_layer = nn.Conv2d(l.in_channels, l.out_channels,
9+
(1, l.kernel_size[0]), (1, l.stride[0]),
10+
(0, l.padding[0]), (1, l.dilation[0]),
11+
l.groups, False if l.bias is None else True, l.padding_mode)
12+
params = l.state_dict()
13+
params['weight'] = params['weight'].unsqueeze(2)
14+
new_layer.load_state_dict(params)
15+
setattr(model, name, new_layer)
16+
elif layer_type == 'BatchNorm1d':
17+
new_layer = nn.BatchNorm2d(l.num_features, l.eps)
18+
new_layer.load_state_dict(l.state_dict())
19+
new_layer.eval()
20+
setattr(model, name, new_layer)
21+
elif layer_type == 'Permute':
22+
dims_2d = []
23+
# 1D to 2D: i.e. (2, 0, 1) -> (2, 3, 0, 1)
24+
for d in l.dims:
25+
assert(d <= 2)
26+
dims_2d.append(d)
27+
if d == 2:
28+
dims_2d.append(3)
29+
l.dims = dims_2d
30+
else:
31+
convert_to_2d(l)

bonito/openvino/model.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import os
2+
import io
3+
import numpy as np
4+
import torch
5+
6+
try:
7+
from openvino.inference_engine import IECore, StatusCode
8+
from .loader import convert_to_2d
9+
except ImportError:
10+
pass
11+
12+
13+
def load_openvino_model(model, dirname):
14+
package = model.config['model']['package']
15+
if package == 'bonito.ctc':
16+
return OpenVINOCTCModel(model, dirname)
17+
elif package == 'bonito.crf':
18+
return OpenVINOCRFModel(model, dirname)
19+
else:
20+
raise Exception('Unknown model configuration: ' + package)
21+
22+
23+
class OpenVINOModel:
24+
25+
def __init__(self, model, dirname):
26+
self.model = model
27+
self.alphabet = model.alphabet
28+
self.parameters = model.parameters
29+
self.stride = model.stride
30+
self.net = None
31+
self.exec_net = None
32+
self.dirname = dirname
33+
self.ie = IECore()
34+
35+
36+
def eval(self):
37+
pass
38+
39+
40+
def half(self):
41+
return self
42+
43+
44+
@property
45+
def config(self):
46+
return self.model.config
47+
48+
49+
def to(self, device):
50+
self.device = str(device).upper()
51+
52+
"""
53+
Call this method once to initialize executable network
54+
"""
55+
def init_model(self, model, inp_shape):
56+
# First, we try to check if there is IR on disk. If not - load model in runtime
57+
xml_path, bin_path = [os.path.join(self.dirname, 'model') + ext for ext in ['.xml', '.bin']]
58+
if os.path.exists(xml_path) and os.path.exists(bin_path):
59+
self.net = self.ie.read_network(xml_path, bin_path)
60+
else:
61+
# Convert model to ONNX buffer
62+
buf = io.BytesIO()
63+
inp = torch.randn(inp_shape)
64+
torch.onnx.export(model, inp, buf, input_names=['input'], output_names=['output'],
65+
opset_version=11)
66+
67+
# Import network from memory buffer
68+
self.net = self.ie.read_network(buf.getvalue(), b'', init_from_buffer=True)
69+
70+
# Load model to device
71+
config = {}
72+
if self.device == 'CPU':
73+
config={'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}
74+
self.exec_net = self.ie.load_network(self.net, self.device,
75+
config=config, num_requests=0)
76+
77+
78+
def process(self, data):
79+
data = data.float()
80+
batch_size = data.shape[0]
81+
inp_shape = list(data.shape)
82+
inp_shape[0] = 1 # We will run the batch asynchronously
83+
84+
# List that maps infer requests to index of processed chunk from batch.
85+
# -1 means that request has not been started yet.
86+
infer_request_input_id = [-1] * len(self.exec_net.requests)
87+
out_shape = self.net.outputs['output'].shape
88+
# CTC network produces 1xWxNxC
89+
output = np.zeros([out_shape[-3], batch_size, out_shape[-1]], dtype=np.float32)
90+
91+
for inp_id in range(batch_size):
92+
# Get idle infer request
93+
infer_request_id = self.exec_net.get_idle_request_id()
94+
if infer_request_id < 0:
95+
status = self.exec_net.wait(num_requests=1)
96+
if status != StatusCode.OK:
97+
raise Exception("Wait for idle request failed!")
98+
infer_request_id = self.exec_net.get_idle_request_id()
99+
if infer_request_id < 0:
100+
raise Exception("Invalid request id!")
101+
102+
out_id = infer_request_input_id[infer_request_id]
103+
request = self.exec_net.requests[infer_request_id]
104+
105+
# Copy output prediction
106+
if out_id != -1:
107+
output[:,out_id:out_id+1] = request.output_blobs['output'].buffer
108+
109+
# Start this request on new data
110+
infer_request_input_id[infer_request_id] = inp_id
111+
request.async_infer({'input': data[inp_id]})
112+
inp_id += 1
113+
114+
# Wait for the rest of requests
115+
status = self.exec_net.wait()
116+
if status != StatusCode.OK:
117+
raise Exception("Wait for idle request failed!")
118+
for infer_request_id, out_id in enumerate(infer_request_input_id):
119+
if out_id == -1:
120+
continue
121+
request = self.exec_net.requests[infer_request_id]
122+
output[:,out_id:out_id+1] = request.output_blobs['output'].buffer
123+
124+
return torch.tensor(output)
125+
126+
127+
class OpenVINOCTCModel(OpenVINOModel):
128+
129+
def __init__(self, model, dirname):
130+
super().__init__(model, dirname)
131+
132+
133+
def __call__(self, data):
134+
data = data.unsqueeze(2) # 1D->2D
135+
if self.exec_net is None:
136+
convert_to_2d(self.model)
137+
self.init_model(self.model, [1, 1, 1, data.shape[-1]])
138+
139+
return self.process(data)
140+
141+
142+
def decode(self, x, beamsize=5, threshold=1e-3, qscores=False, return_path=False):
143+
return self.model.decode(x, beamsize=beamsize, threshold=threshold,
144+
qscores=qscores, return_path=return_path)
145+
146+
147+
class OpenVINOCRFModel(OpenVINOModel):
148+
149+
def __init__(self, model, dirname):
150+
super().__init__(model, dirname)
151+
self.seqdist = model.seqdist
152+
153+
154+
def __call__(self, data):
155+
if self.exec_net is None:
156+
self.init_model(self.model.encoder, [1, 1, data.shape[-1]])
157+
158+
return self.process(data)
159+
160+
161+
def decode(self, x):
162+
return self.model.decode(x)
163+
164+
165+
def decode_batch(self, x):
166+
return self.model.decode_batch(x)

0 commit comments

Comments
 (0)