From 75ae1e677cec0f303640f7fc188b3f9c039b702f Mon Sep 17 00:00:00 2001 From: voldemortX Date: Sat, 27 Nov 2021 16:49:16 +0800 Subject: [PATCH 1/4] torch.onnx --- profiling.py | 26 ++++----------------- to_onnx.py | 47 +++++++++++++++++++++++++++++++++++++ tools/onnx_utils.py | 56 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 21 deletions(-) create mode 100644 to_onnx.py create mode 100644 tools/onnx_utils.py diff --git a/profiling.py b/profiling.py index eb3d2cca..6a2f81d0 100644 --- a/profiling.py +++ b/profiling.py @@ -1,36 +1,20 @@ -import yaml import argparse +import torch +import yaml + from utils.all_utils_landec import build_lane_detection_model as build_lane_model from utils.all_utils_semseg import build_segmentation_model, load_checkpoint from tools.profiling_utils import init_lane, init_seg, speed_evaluate_real, speed_evaluate_simple, model_profile -import torch +from tools.onnx_utils import add_basic_arguments if __name__ == '__main__': # Settings parser = argparse.ArgumentParser(description='PyTorch Auto-drive') - parser.add_argument('--height', type=int, default=288, - help='Image input height (default: 288)') - parser.add_argument('--width', type=int, default=800, - help='Image input width (default: 800)') - parser.add_argument('--dataset', type=str, default='tusimple', - help='Profile on TuSimple (tusimple) / CULane (culane) (default: tusimple)') - parser.add_argument('--method', type=str, default='baseline', - help='method selection (lstr/scnn/resa/sad/baseline) (default: baseline)') - parser.add_argument('--backbone', type=str, default='erfnet', - help='backbone selection (erfnet/enet/vgg16/resnet18s/resnet18/resnet34/resnet50/resnet101)' - '(default: erfnet)') - parser.add_argument('--task', type=str, default='lane', - help='task selection (lane/seg)') + add_basic_arguments(parser) parser.add_argument('--mode', type=str, default='simple', help='Profiling mode (simple/real)') - parser.add_argument('--model', type=str, default='deeplabv3', - help='Model selection (fcn/erfnet/deeplabv2/deeplabv3/enet) (default: deeplabv3)') parser.add_argument('--times', type=int, default=1, help='Select test times') - parser.add_argument('--encoder-only', action='store_true', default=False, - help='Only train the encoder. ENet trains encoder and decoder separately (default: False)') - parser.add_argument('--continue-from', type=str, default=None, - help='Continue training from a previous checkpoint') args = parser.parse_args() lane_need_interpolate = ['baseline', 'scnn', 'sad', 'resa'] seg_need_interpolate = ['fcn', 'deeplabv2', 'deeplabv3'] diff --git a/to_onnx.py b/to_onnx.py new file mode 100644 index 00000000..5d44dea1 --- /dev/null +++ b/to_onnx.py @@ -0,0 +1,47 @@ +# Convert only the pt model part + +import argparse +import onnx +import torch +import yaml + +from utils.all_utils_landec import build_lane_detection_model as build_lane_model +from utils.all_utils_semseg import build_segmentation_model, load_checkpoint +from tools.onnx_utils import add_basic_arguments, pt_to_onnx, test_conversion + + +if __name__ == '__main__': + # Settings + parser = argparse.ArgumentParser(description='PyTorch Auto-drive') + add_basic_arguments(parser) + args = parser.parse_args() + with open('configs.yaml', 'r') as f: # Safer and cleaner than box/EasyDict + configs = yaml.load(f, Loader=yaml.Loader) + input_sizes = (args.height, args.width) + if args.task == 'lane': + num_classes = configs[configs['LANE_DATASETS'][args.dataset]]['NUM_CLASSES'] + net = build_lane_model(args, num_classes) + elif args.task == 'seg': + num_classes = configs[configs['SEGMENTATION_DATASETS'][args.dataset]]['NUM_CLASSES'] + net, _, _, _ = build_segmentation_model(configs, args, num_classes, 0, input_sizes) + else: + raise ValueError('Task must be lane or seg! Not {}'.format(args.task)) + + device = torch.device('cpu') + if torch.cuda.is_available(): + device = torch.device('cuda:0') + print(device) + net.to(device) + if args.continue_from is not None: + load_checkpoint(net=net, optimizer=None, lr_scheduler=None, filename=args.continue_from) + else: + raise ValueError('Must provide a weight file by --continue-from') + torch.manual_seed(7) + dummy = torch.randn(1, 3, args.height, args.width, device=device, requires_grad=False) + + # Convert + onnx_filename = args.continue_from[:args.continue_from.rfind('.')] + '.onnx' + pt_to_onnx(net, dummy, onnx_filename) + + # Test + test_conversion(net, onnx_filename, dummy) diff --git a/tools/onnx_utils.py b/tools/onnx_utils.py new file mode 100644 index 00000000..6ee5bcad --- /dev/null +++ b/tools/onnx_utils.py @@ -0,0 +1,56 @@ +# Convert only the pt model part + +import onnx +import onnxruntime as ort +import numpy as np +import torch + + +def add_basic_arguments(p): + p.add_argument('--height', type=int, default=288, + help='Image input height (default: 288)') + p.add_argument('--width', type=int, default=800, + help='Image input width (default: 800)') + p.add_argument('--dataset', type=str, default='tusimple', + help='Profile on TuSimple (tusimple) / CULane (culane) (default: tusimple)') + p.add_argument('--method', type=str, default='baseline', + help='method selection (lstr/scnn/sad/baseline) (default: baseline)') + p.add_argument('--backbone', type=str, default='erfnet', + help='backbone selection (erfnet/enet/vgg16/resnet18s/resnet18/resnet34/resnet50/resnet101)' + '(default: erfnet)') + p.add_argument('--task', type=str, default='lane', + help='task selection (lane/seg)') + p.add_argument('--model', type=str, default='deeplabv3', + help='Model selection (fcn/erfnet/deeplabv2/deeplabv3/enet) (default: deeplabv3)') + p.add_argument('--encoder-only', action='store_true', default=False, + help='Only train the encoder. ENet trains encoder and decoder separately (default: False)') + p.add_argument('--continue-from', type=str, default=None, + help='Continue training from a previous checkpoint') + + +def pt_to_onnx(net, dummy, filename): + net.eval() + torch.onnx.export(net, dummy, filename, verbose=True, input_names=['input1'], output_names=['output1']) + + +@torch.no_grad() +def test_conversion(pt_net, onnx_filename, dummy): + pt_net.eval() + dummy = dummy.cpu() + pt_net = pt_net.cpu() + pt_out = pt_net(dummy) + onnx_net = onnx.load(onnx_filename) + onnx.checker.check_model(onnx_net) + onnx.helper.printable_graph(onnx_net.graph) + ort_session = ort.InferenceSession(onnx_filename) + onnx_out = ort_session.run(None, {'input1': dummy.numpy()}) + diff = 0.0 + avg = 0.0 + for (_, temp_pt), temp_onnx in zip(pt_out.items(), onnx_out): + diff += np.abs((temp_onnx - temp_pt.numpy())).mean() + avg += temp_pt.abs().mean().item() + diff /= len(onnx_out) + avg /= len(onnx_out) + diff_percentage = diff / avg * 100 + print('Average diff: {}\nAverage diff (%): {}'.format(diff, diff_percentage)) + assert diff_percentage < 0.1, 'Diff over 0.1%, please check for special operators!' From 4df92fde1233a5b1dae516ad2883594a0a464cf5 Mon Sep 17 00:00:00 2001 From: voldemortX Date: Sat, 27 Nov 2021 17:08:12 +0800 Subject: [PATCH 2/4] support resa and lstr --- to_onnx.py | 10 +++++++--- tools/onnx_utils.py | 12 ++++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/to_onnx.py b/to_onnx.py index 5d44dea1..753e2d23 100644 --- a/to_onnx.py +++ b/to_onnx.py @@ -1,13 +1,12 @@ # Convert only the pt model part import argparse -import onnx import torch import yaml from utils.all_utils_landec import build_lane_detection_model as build_lane_model from utils.all_utils_semseg import build_segmentation_model, load_checkpoint -from tools.onnx_utils import add_basic_arguments, pt_to_onnx, test_conversion +from tools.onnx_utils import add_basic_arguments, pt_to_onnx, test_conversion, MINIMAL_OPSET_VERSIONS if __name__ == '__main__': @@ -41,7 +40,12 @@ # Convert onnx_filename = args.continue_from[:args.continue_from.rfind('.')] + '.onnx' - pt_to_onnx(net, dummy, onnx_filename) + op_v = 9 + if args.task == 'lane' and args.method in MINIMAL_OPSET_VERSIONS.keys(): + op_v = MINIMAL_OPSET_VERSIONS[args.method] + if args.task == 'seg' and args.model in MINIMAL_OPSET_VERSIONS.keys(): + op_v = MINIMAL_OPSET_VERSIONS[args.model] + pt_to_onnx(net, dummy, onnx_filename, opset_version=op_v) # Test test_conversion(net, onnx_filename, dummy) diff --git a/tools/onnx_utils.py b/tools/onnx_utils.py index 6ee5bcad..22090e59 100644 --- a/tools/onnx_utils.py +++ b/tools/onnx_utils.py @@ -6,6 +6,13 @@ import torch +MINIMAL_OPSET_VERSIONS = { + # Others use 9 + 'lstr': 11, + 'resa': 12 +} + + def add_basic_arguments(p): p.add_argument('--height', type=int, default=288, help='Image input height (default: 288)') @@ -28,9 +35,10 @@ def add_basic_arguments(p): help='Continue training from a previous checkpoint') -def pt_to_onnx(net, dummy, filename): +def pt_to_onnx(net, dummy, filename, opset_version=9): net.eval() - torch.onnx.export(net, dummy, filename, verbose=True, input_names=['input1'], output_names=['output1']) + torch.onnx.export(net, dummy, filename, verbose=True, input_names=['input1'], output_names=['output1'], + opset_version=opset_version) @torch.no_grad() From 9d63de5385343f178f06e713e3c8fcb286411003 Mon Sep 17 00:00:00 2001 From: voldemortX Date: Sat, 27 Nov 2021 21:11:57 +0800 Subject: [PATCH 3/4] SCNN support --- tools/onnx_utils.py | 3 +- torchvision_models/_utils.py | 9 ++++++ torchvision_models/common_models.py | 45 ++++++++++++++++++++--------- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/tools/onnx_utils.py b/tools/onnx_utils.py index 22090e59..988a648c 100644 --- a/tools/onnx_utils.py +++ b/tools/onnx_utils.py @@ -9,7 +9,8 @@ MINIMAL_OPSET_VERSIONS = { # Others use 9 'lstr': 11, - 'resa': 12 + 'resa': 12, + 'scnn': 11 } diff --git a/torchvision_models/_utils.py b/torchvision_models/_utils.py index 4b337a53..ad746617 100644 --- a/torchvision_models/_utils.py +++ b/torchvision_models/_utils.py @@ -4,6 +4,15 @@ from torch import nn +def is_tracing() -> bool: + # https://github.com/pytorch/pytorch/issues/42448 + trace = torch.jit.is_tracing() + if isinstance(trace, bool): + return trace + else: + return torch._C._is_tracing() + + class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model diff --git a/torchvision_models/common_models.py b/torchvision_models/common_models.py index 762a8965..77456169 100644 --- a/torchvision_models/common_models.py +++ b/torchvision_models/common_models.py @@ -4,6 +4,8 @@ import torch.nn as nn from torch.nn import functional as F +from ._utils import is_tracing + class non_bottleneck_1d(nn.Module): def __init__(self, chann, dropprob, dilated): @@ -164,20 +166,35 @@ def _adjust_initializations(self, num_channels=128): def forward(self, input): output = input - # First one remains unchanged (according to the original paper), why not add a relu afterwards? - # Update and send to next - # Down - for i in range(1, output.shape[2]): - output[:, :, i:i + 1, :].add_(F.relu(self.conv_d(output[:, :, i - 1:i, :]))) - # Up - for i in range(output.shape[2] - 2, 0, -1): - output[:, :, i:i + 1, :].add_(F.relu(self.conv_u(output[:, :, i + 1:i + 2, :]))) - # Right - for i in range(1, output.shape[3]): - output[:, :, :, i:i + 1].add_(F.relu(self.conv_r(output[:, :, :, i - 1:i]))) - # Left - for i in range(output.shape[3] - 2, 0, -1): - output[:, :, :, i:i + 1].add_(F.relu(self.conv_l(output[:, :, :, i + 1:i + 2]))) + if is_tracing(): + # PyTorch index+add_ will be ignored in traced graph + # Down + for i in range(1, output.shape[2]): + output[:, :, i:i + 1, :] = output[:, :, i:i + 1, :].add(F.relu(self.conv_d(output[:, :, i - 1:i, :]))) + # Up + for i in range(output.shape[2] - 2, 0, -1): + output[:, :, i:i + 1, :] = output[:, :, i:i + 1, :].add(F.relu(self.conv_u(output[:, :, i + 1:i + 2, :]))) + # Right + for i in range(1, output.shape[3]): + output[:, :, :, i:i + 1] = output[:, :, :, i:i + 1].add(F.relu(self.conv_r(output[:, :, :, i - 1:i]))) + # Left + for i in range(output.shape[3] - 2, 0, -1): + output[:, :, :, i:i + 1] = output[:, :, :, i:i + 1].add(F.relu(self.conv_l(output[:, :, :, i + 1:i + 2]))) + else: + # First one remains unchanged (according to the original paper), why not add a relu afterwards? + # Update and send to next + # Down + for i in range(1, output.shape[2]): + output[:, :, i:i + 1, :].add_(F.relu(self.conv_d(output[:, :, i - 1:i, :]))) + # Up + for i in range(output.shape[2] - 2, 0, -1): + output[:, :, i:i + 1, :].add_(F.relu(self.conv_u(output[:, :, i + 1:i + 2, :]))) + # Right + for i in range(1, output.shape[3]): + output[:, :, :, i:i + 1].add_(F.relu(self.conv_r(output[:, :, :, i - 1:i]))) + # Left + for i in range(output.shape[3] - 2, 0, -1): + output[:, :, :, i:i + 1].add_(F.relu(self.conv_l(output[:, :, :, i + 1:i + 2]))) return output From ad2d777bef6e19c0fb66734588127954a826beb0 Mon Sep 17 00:00:00 2001 From: cedricgsh Date: Sun, 28 Nov 2021 15:54:48 +0800 Subject: [PATCH 4/4] add onnxruntime draft --- to_onnx.py | 53 ++++++++++++++++++++++++++++++++++++--- tools/onnx_utils.py | 39 +++++++++++++++++++++++++++- utils/all_utils_landec.py | 26 +++++++++++++++---- 3 files changed, 108 insertions(+), 10 deletions(-) diff --git a/to_onnx.py b/to_onnx.py index 753e2d23..880b6388 100644 --- a/to_onnx.py +++ b/to_onnx.py @@ -3,10 +3,12 @@ import argparse import torch import yaml +import fcntl from utils.all_utils_landec import build_lane_detection_model as build_lane_model +from utils.all_utils_landec import init, test_one_set, fast_evaluate from utils.all_utils_semseg import build_segmentation_model, load_checkpoint -from tools.onnx_utils import add_basic_arguments, pt_to_onnx, test_conversion, MINIMAL_OPSET_VERSIONS +from tools.onnx_utils import add_basic_arguments, pt_to_onnx, test_conversion, MINIMAL_OPSET_VERSIONS, get_ort_session if __name__ == '__main__': @@ -36,8 +38,14 @@ else: raise ValueError('Must provide a weight file by --continue-from') torch.manual_seed(7) - dummy = torch.randn(1, 3, args.height, args.width, device=device, requires_grad=False) + mean = configs['GENERAL']['MEAN'] + std = configs['GENERAL']['STD'] + if args.dataset not in configs['LANE_DATASETS'].keys(): + raise ValueError + # temp variable for inference + real_height, real_width = input_sizes[0] + dummy = torch.randn(1, 3, real_height, real_width, device=device, requires_grad=False) # Convert onnx_filename = args.continue_from[:args.continue_from.rfind('.')] + '.onnx' op_v = 9 @@ -45,7 +53,44 @@ op_v = MINIMAL_OPSET_VERSIONS[args.method] if args.task == 'seg' and args.model in MINIMAL_OPSET_VERSIONS.keys(): op_v = MINIMAL_OPSET_VERSIONS[args.model] + # TODO: directly load xxx.onnx without converting pt_to_onnx(net, dummy, onnx_filename, opset_version=op_v) - # Test - test_conversion(net, onnx_filename, dummy) + if args.verify == 'no': + print("The model has been converted.") + elif args.verify == 'simple': + test_conversion(net, onnx_filename, dummy) + elif args.verify == 'real': + num_classes = configs[configs['LANE_DATASETS'][args.dataset]]['NUM_CLASSES'] + input_sizes = configs[configs['LANE_DATASETS'][args.dataset]]['SIZES'] + gap = configs[configs['LANE_DATASETS'][args.dataset]]['GAP'] + ppl = configs[configs['LANE_DATASETS'][args.dataset]]['PPL'] + thresh = configs[configs['LANE_DATASETS'][args.dataset]]['THRESHOLD'] + weights = configs[configs['LANE_DATASETS'][args.dataset]]['WEIGHTS'] + base = configs[configs['LANE_DATASETS'][args.dataset]]['BASE_DIR'] + max_lane = configs[configs['LANE_DATASETS'][args.dataset]]['MAX_LANE'] + ort_net = get_ort_session(onnx_filename) + + # onnx inference + if args.state == 1 or args.state == 2 or args.state == 3: + data_loader = init(batch_size=args.batch_size, state=args.state, dataset=args.dataset, + input_sizes=input_sizes, mean=mean, std=std, base=base, workers=args.workers, + method=args.method) + load_checkpoint(net=net, optimizer=None, lr_scheduler=None, filename=args.continue_from) + if args.state == 1: # Validate with mean IoU + _, x = fast_evaluate(loader=data_loader, device=device, net=ort_net, + num_classes=num_classes, output_size=input_sizes[0], + is_mixed_precision=args.mixed_precision) + with open('log.txt', 'a') as f: + # Safe writing with locks + fcntl.flock(f, fcntl.LOCK_EX) + f.write(args.exp_name + ' validation: ' + str(x) + '\n') + fcntl.flock(f, fcntl.LOCK_UN) + else: # Test with official scripts later (so just predict lanes here) + test_one_set(net=ort_net, device=device, loader=data_loader, is_mixed_precision=args.mixed_precision, + gap=gap, input_sizes=input_sizes, ppl=ppl, thresh=thresh, dataset=args.dataset, + method=args.method, max_lane=max_lane, exp_name=args.exp_name, deploy='onnx') + else: + raise ValueError + +# python to_onnx.py --state=2 --continue-from=vgg16_baseline_tusimple_20210223.pt --dataset=tusimple --method=baseline --backbone=vgg16 --batch-size=1 --mixed-precision --task=lane --exp-name=none_onnx_test --verify=real \ No newline at end of file diff --git a/tools/onnx_utils.py b/tools/onnx_utils.py index 988a648c..5f5f37cb 100644 --- a/tools/onnx_utils.py +++ b/tools/onnx_utils.py @@ -5,7 +5,6 @@ import numpy as np import torch - MINIMAL_OPSET_VERSIONS = { # Others use 9 'lstr': 11, @@ -34,6 +33,19 @@ def add_basic_arguments(p): help='Only train the encoder. ENet trains encoder and decoder separately (default: False)') p.add_argument('--continue-from', type=str, default=None, help='Continue training from a previous checkpoint') + p.add_argument('--batch-size', type=int, default=8, + help='input batch size. Recommend 4 times the training batch size in testing (default: 8)') + p.add_argument('--mixed-precision', action='store_true', default=False, + help='Enable mixed precision training (default: False)') + p.add_argument('--state', type=int, default=0, + help='Conduct validation(3)/final test(2)/fast validation(1)/normal training(0) (default: 0)') + p.add_argument('--workers', type=int, default=10, + help='Number of workers (threads) when loading data.' + 'Recommend value for training: batch_size / 2 (default: 10)') + p.add_argument('--exp-name', type=str, default='', + help='Name of experiment') + p.add_argument('--verify', type=str, default='real', + help='no: without verification/real: process the whole dataset/simple: process a random tensor') def pt_to_onnx(net, dummy, filename, opset_version=9): @@ -63,3 +75,28 @@ def test_conversion(pt_net, onnx_filename, dummy): diff_percentage = diff / avg * 100 print('Average diff: {}\nAverage diff (%): {}'.format(diff, diff_percentage)) assert diff_percentage < 0.1, 'Diff over 0.1%, please check for special operators!' + + +def get_ort_session(onnx_filename): + # return onnx runtime session + print(ort.get_device()) + # providers = [ + # ('CUDAExecutionProvider', { + # 'device_id': 0, + # 'arena_extend_strategy': 'kNextPowerOfTwo', + # 'gpu_mem_limit': 2 * 1024 * 1024 * 1024, + # 'cudnn_conv_algo_search': 'EXHAUSTIVE', + # 'do_copy_in_default_stream': True, + # }), + # ] + onnx_net = onnx.load(onnx_filename) + onnx.checker.check_model(onnx_net) + onnx.helper.printable_graph(onnx_net.graph) + ort_session = ort.InferenceSession(onnx_filename) + + return ort_session + + +def to_numpy(tensor): + # transfer tensor to numpy + return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() diff --git a/utils/all_utils_landec.py b/utils/all_utils_landec.py index de57aae1..bb2758dc 100644 --- a/utils/all_utils_landec.py +++ b/utils/all_utils_landec.py @@ -5,6 +5,7 @@ import ujson as json import numpy as np from tqdm import tqdm +from collections import OrderedDict if torch.__version__ >= '1.6.0': from torch.cuda.amp import autocast, GradScaler else: @@ -359,11 +360,24 @@ def fast_evaluate(net, device, loader, is_mixed_precision, output_size, num_clas # A unified inference function, for segmentation-based lane detection methods @torch.no_grad() -def lane_as_segmentation_inference(net, inputs, input_sizes, gap, ppl, thresh, dataset, max_lane=0, forward=True): +def lane_as_segmentation_inference(net, inputs, input_sizes, gap, ppl, thresh, dataset, max_lane=0, forward=True, + deploy='pt'): # Assume net and images are on the same device # images: B x C x H x W # Return: a list of lane predictions on each image - outputs = net(inputs) if forward else inputs # Support no forwarding inside this function + # deploy: pt(pytorch)/onnx(onnx runtime)/trt(tensortrt) + if deploy == 'pt': + outputs = net(inputs) if forward else inputs # Support no forwarding inside this function + elif deploy == 'onnx': + + onnx_inputs = inputs.detach().cpu().numpy() if inputs.requires_grad else inputs.cpu().numpy() + onnx_out = net.run(None, {'input1': onnx_inputs}) + outputs = OrderedDict() + outputs['out'] = torch.from_numpy(onnx_out[0]).to(inputs.device) + outputs['lane'] = torch.from_numpy(onnx_out[1]).to(inputs.device) + elif deploy == 'trt': + # TODO: support the tensorrt + pass prob_map = torch.nn.functional.interpolate(outputs['out'], size=input_sizes[0], mode='bilinear', align_corners=True).softmax(dim=1) existence_conf = outputs['lane'].sigmoid() @@ -386,19 +400,21 @@ def lane_as_segmentation_inference(net, inputs, input_sizes, gap, ppl, thresh, d # Adapted from harryhan618/SCNN_Pytorch @torch.no_grad() def test_one_set(net, device, loader, is_mixed_precision, input_sizes, gap, ppl, thresh, dataset, - method='baseline', max_lane=0, exp_name=None): + method='baseline', max_lane=0, exp_name=None, deploy='pt'): # Predict on 1 data_loader and save predictions for the official script # sizes: [input size, test original size, ...] # max_lane = 0 -> unlimited number of lanes all_lanes = [] - net.eval() + # onnx runtime dose not have .eval() + if deploy == 'pt': + net.eval() for images, filenames in tqdm(loader): images = images.to(device) with autocast(is_mixed_precision): if method in ['baseline', 'scnn', 'resa']: batch_coordinates = lane_as_segmentation_inference(net, images, input_sizes, gap, ppl, thresh, dataset, - max_lane) + max_lane, deploy=deploy) else: batch_coordinates = net.inference(images, input_sizes, gap, ppl, dataset, max_lane)