Skip to content

Commit ad67baa

Browse files
authored
Merge pull request #163 from CoinCheung/dev
more test on the recent modifications
2 parents 13ece2a + e40b825 commit ad67baa

File tree

7 files changed

+76
-46
lines changed

7 files changed

+76
-46
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ My implementation of [BiSeNetV1](https://arxiv.org/abs/1808.00897) and [BiSeNetV
66
mIOUs and fps on cityscapes val set:
77
| none | ss | ssc | msf | mscf | fps(fp16/fp32) | link |
88
|------|:--:|:---:|:---:|:----:|:---:|:----:|
9-
| bisenetv1 | 75.10 | 76.90 | 77.22 | 78.73 | 60/19 | [download](https://github.com/CoinCheung/BiSeNet/releases/download/0.0.0/model_final_v1_city.pth) |
10-
| bisenetv2 | 74.95 | 75.58 | 76.53 | 77.08 | 50/16 | [download](https://github.com/CoinCheung/BiSeNet/releases/download/0.0.0/model_final_v2_city.pth) |
9+
| bisenetv1 | 75.44 | 76.94 | 77.45 | 78.86 | 68/23 | [download](https://github.com/CoinCheung/BiSeNet/releases/download/0.0.0/model_final_v1_city_new.pth) |
10+
| bisenetv2 | 74.95 | 75.58 | 76.53 | 77.08 | 59/21 | [download](https://github.com/CoinCheung/BiSeNet/releases/download/0.0.0/model_final_v2_city.pth) |
11+
1112

1213
mIOUs on cocostuff val2017 set:
1314
| none | ss | ssc | msf | mscf | link |
1415
|------|:--:|:---:|:---:|:----:|:----:|
15-
| bisenetv1 | 31.89 | 31.62 | 32.81 | 32.72 | [download](https://github.com/CoinCheung/BiSeNet/releases/download/0.0.0/model_final_v1_coco.pth) |
16+
| bisenetv1 | 31.49 | 31.42 | 32.46 | 32.55 | [download](https://github.com/CoinCheung/BiSeNet/releases/download/0.0.0/model_final_v1_coco_new.pth) |
1617
| bisenetv2 | 30.49 | 30.55 | 31.81 | 31.73 | [download](https://github.com/CoinCheung/BiSeNet/releases/download/0.0.0/model_final_v2_coco.pth) |
1718

1819
> Where **ss** means single scale evaluation, **ssc** means single scale crop evaluation, **msf** means multi-scale evaluation with flip augment, and **mscf** means multi-scale crop evaluation with flip evaluation. The eval scales and crop size of multi-scales evaluation can be found in [configs](./configs/).

dist_train.sh

Lines changed: 0 additions & 23 deletions
This file was deleted.

lib/models/bisenetv1.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,16 @@ def __init__(self, in_chan, out_chan, *args, **kwargs):
9999
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
100100
self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
101101
self.bn_atten = BatchNorm2d(out_chan)
102-
self.sigmoid_atten = nn.Sigmoid()
102+
# self.sigmoid_atten = nn.Sigmoid()
103103
self.init_weight()
104104

105105
def forward(self, x):
106106
feat = self.conv(x)
107107
atten = torch.mean(feat, dim=(2, 3), keepdim=True)
108108
atten = self.conv_atten(atten)
109109
atten = self.bn_atten(atten)
110-
atten = self.sigmoid_atten(atten)
110+
# atten = self.sigmoid_atten(atten)
111+
atten = atten.sigmoid()
111112
out = torch.mul(feat, atten)
112113
return out
113114

@@ -206,30 +207,39 @@ class FeatureFusionModule(nn.Module):
206207
def __init__(self, in_chan, out_chan, *args, **kwargs):
207208
super(FeatureFusionModule, self).__init__()
208209
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
209-
self.conv1 = nn.Conv2d(out_chan,
210-
out_chan//4,
211-
kernel_size = 1,
212-
stride = 1,
213-
padding = 0,
214-
bias = False)
215-
self.conv2 = nn.Conv2d(out_chan//4,
210+
## use conv-bn instead of 2 layer mlp, so that tensorrt 7.2.3.4 can work for fp16
211+
self.conv = nn.Conv2d(out_chan,
216212
out_chan,
217213
kernel_size = 1,
218214
stride = 1,
219215
padding = 0,
220216
bias = False)
221-
self.relu = nn.ReLU(inplace=True)
222-
self.sigmoid = nn.Sigmoid()
217+
self.bn = nn.BatchNorm2d(out_chan)
218+
# self.conv1 = nn.Conv2d(out_chan,
219+
# out_chan//4,
220+
# kernel_size = 1,
221+
# stride = 1,
222+
# padding = 0,
223+
# bias = False)
224+
# self.conv2 = nn.Conv2d(out_chan//4,
225+
# out_chan,
226+
# kernel_size = 1,
227+
# stride = 1,
228+
# padding = 0,
229+
# bias = False)
230+
# self.relu = nn.ReLU(inplace=True)
223231
self.init_weight()
224232

225233
def forward(self, fsp, fcp):
226234
fcat = torch.cat([fsp, fcp], dim=1)
227235
feat = self.convblk(fcat)
228236
atten = torch.mean(feat, dim=(2, 3), keepdim=True)
229-
atten = self.conv1(atten)
230-
atten = self.relu(atten)
231-
atten = self.conv2(atten)
232-
atten = self.sigmoid(atten)
237+
atten = self.conv(atten)
238+
atten = self.bn(atten)
239+
# atten = self.conv1(atten)
240+
# atten = self.relu(atten)
241+
# atten = self.conv2(atten)
242+
atten = atten.sigmoid()
233243
feat_atten = torch.mul(feat, atten)
234244
feat_out = feat_atten + feat
235245
return feat_out

tensorrt/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ set(CMAKE_CXX_FLAGS "-std=c++14 -O1")
66

77

88
link_directories(/usr/local/cuda/lib64)
9+
# set(OpenCV_DIR "/opt/opencv/lib/cmake/opencv4")
910

1011

1112
find_package(CUDA REQUIRED)

tensorrt/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,5 @@ Likewise, you do not need to worry about this anymore with 7.2.3.4.
6262

6363
3. The speed(fps) is tested on a single nvidia Tesla T4 gpu with `batchsize=1` and `cropsize=(1024,2048)`. Please note that T4 gpu is almost 2 times slower than 2080ti, you should evaluate the speed considering your own platform and cropsize. Also note that the performance would be affected if your gpu is concurrently working on other tasks. Please make sure no other program is running on your gpu when you test the speed.
6464

65-
4. ~On my platform, after compiling with tensorrt, the model size of bisenetv1 is 33Mb(fp16) and 133Mb(fp32), and the size of bisenetv2 is 29Mb(fp16) and 54Mb(fp32). However, the fps of bisenetv1 is 60(fp16) and 19(fp32), while the fps of bisenetv2 is 50(fp16) and 16(fp32). It is obvious that bisenetv2 has fewer parameters than bisenetv1, but the speed is otherwise. I am not sure whether it is because tensorrt has worse optimization strategy in some ops used in bisenetv2(such as depthwise convolution) or because of the limitation of the gpu on different ops. Please tell me if you have better idea on this.~
66-
Not tested.
65+
4. On my platform, after compiling with tensorrt, the model size of bisenetv1 is 29Mb(fp16) and 128Mb(fp32), and the size of bisenetv2 is 16Mb(fp16) and 42Mb(fp32). However, the fps of bisenetv1 is 68(fp16) and 23(fp32), while the fps of bisenetv2 is 59(fp16) and 21(fp32). It is obvious that bisenetv2 has fewer parameters than bisenetv1, but the speed is otherwise. I am not sure whether it is because tensorrt has worse optimization strategy in some ops used in bisenetv2(such as depthwise convolution) or because of the limitation of the gpu on different ops. Please tell me if you have better idea on this.
6766

tools/conver_to_trt.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
parse = argparse.ArgumentParser()
1616
parse.add_argument('--config', dest='config', type=str, default='configs/bisenetv2.py',)
1717
parse.add_argument('--weight-path', type=str, default='./res/model_final.pth',)
18+
parse.add_argument('--fp16', action='store_true')
1819
parse.add_argument('--outpath', dest='out_pth', type=str,
1920
default='model.trt')
2021
args = parse.parse_args()
@@ -23,12 +24,16 @@
2324
cfg = set_cfg_from_file(args.config)
2425
if cfg.use_sync_bn: cfg.use_sync_bn = False
2526

26-
net = model_factory[cfg.model_type](19, output_aux=False).cuda()
27-
net.load_state_dict(torch.load(args.weight_pth))
27+
net = model_factory[cfg.model_type](cfg.n_cats, aux_mode='pred')
28+
net.load_state_dict(torch.load(args.weight_path), strict=False)
29+
net.cuda()
2830
net.eval()
2931

3032

3133
# dummy_input = torch.randn(1, 3, *cfg.crop_size)
3234
dummy_input = torch.randn(1, 3, 1024, 2048).cuda()
3335

34-
trt_model = torch2trt(net, [dummy_input, ])
36+
trt_model = torch2trt(net, [dummy_input, ], fp16_mode=args.fp16, max_workspace=1 << 30)
37+
38+
with open(args.out_pth, 'wb') as fw:
39+
fw.write(trt_model.engine.serialize())

tools/export_libtorch.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import argparse
2+
import os.path as osp
3+
import sys
4+
sys.path.insert(0, '.')
5+
6+
import torch
7+
8+
from lib.models import model_factory
9+
from configs import set_cfg_from_file
10+
11+
torch.set_grad_enabled(False)
12+
13+
14+
parse = argparse.ArgumentParser()
15+
parse.add_argument('--config', dest='config', type=str,
16+
default='configs/bisenetv2.py',)
17+
parse.add_argument('--weight-path', dest='weight_pth', type=str,
18+
default='model_final.pth')
19+
parse.add_argument('--outpath', dest='out_pth', type=str,
20+
default='model.pt')
21+
args = parse.parse_args()
22+
23+
24+
cfg = set_cfg_from_file(args.config)
25+
if cfg.use_sync_bn: cfg.use_sync_bn = False
26+
27+
net = model_factory[cfg.model_type](cfg.n_cats, aux_mode='pred')
28+
net.load_state_dict(torch.load(args.weight_pth), strict=False)
29+
net.eval()
30+
31+
32+
# dummy_input = torch.randn(1, 3, *cfg.crop_size)
33+
dummy_input = torch.randn(1, 3, 1024, 2048)
34+
script_module = torch.jit.trace(net, dummy_input)
35+
# script_module.save(args.out_pth, _use_new_zipfile_serialization=False)
36+
script_module.save(args.out_pth)
37+

0 commit comments

Comments
 (0)