spmallick
diff --git a/‎PyTorch-ONNX-TensorRT/CMakeLists.txt‎
Lines changed: 0 additions & 16 deletions b/‎PyTorch-ONNX-TensorRT/CMakeLists.txt‎
Lines changed: 0 additions & 16 deletions
diff --git a/‎PyTorch-ONNX-TensorRT/FindTensorRT.cmake‎
Lines changed: 0 additions & 87 deletions b/‎PyTorch-ONNX-TensorRT/FindTensorRT.cmake‎
Lines changed: 0 additions & 87 deletions
diff --git a/‎PyTorch-ONNX-TensorRT/README.md‎
Lines changed: 10 additions & 9 deletions b/‎PyTorch-ONNX-TensorRT/README.md‎
Lines changed: 10 additions & 9 deletions
diff --git a/‎PyTorch-ONNX-TensorRT/pytorch_model.py‎
Lines changed: 68 additions & 64 deletions b/‎PyTorch-ONNX-TensorRT/pytorch_model.py‎
Lines changed: 68 additions & 64 deletions
diff --git a/‎PyTorch-ONNX-TensorRT/trt_inference.py‎
Lines changed: 76 additions & 0 deletions b/‎PyTorch-ONNX-TensorRT/trt_inference.py‎
Lines changed: 76 additions & 0 deletions
@@ -1,20 +1,21 @@
 # How to convert a model from PyTorch to TensorRT and speed up inference
 The blog post is here: https://www.learnopencv.com/how-to-convert-a-model-from-pytorch-to-tensorrt-and-speed-up-inference/
 
-To run Python part:
+To run PyTorch part:
 ```shell script
 python3 -m pip install -r requirements.txt
 python3 pytorch_model.py
 ```
 
-To run C++ part:
-```shell script
-mkdir build
-cd build
-cmake -DOpenCV_DIR=[path-to-opencv-build] -DTensorRT_DIR=[path-to-tensorrt] ..
-make -j8
-trt_sample[.exe] resnet50.onnx turkish_coffee.jpg
-```
+To run TensorRT part:
+1. Download and install NVIDIA CUDA 10.0 or later following by official instruction: [link](https://developer.nvidia.com/cuda-10.0-download-archive)
+2. Download and extract CuDNN library for your CUDA version (login required): [link](https://developer.nvidia.com/rdp/cudnn-download)
+3. Download and extract NVIDIA TensorRT library for your CUDA version (login required): 
+[link](https://developer.nvidia.com/nvidia-tensorrt-6x-download). 
+The minimum required version is 6.0.1.5. 
+Please follow the [Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) for your system and don't forget to install Python's part
+4. Add the absolute path to CUDA, TensorRT, CuDNN libs to the environment variable ```PATH``` or ```LD_LIBRARY_PATH``` 
+5. Install [PyCUDA](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#installing-pycuda)
 
 # AI Courses by OpenCV
 
 
@@ -1,75 +1,79 @@
 import cv2
 import onnx
 import torch
-from albumentations import (
-    Compose,
-    Resize,
-)
+from albumentations import (Compose,Resize,)
 from albumentations.augmentations.transforms import Normalize
 from albumentations.pytorch.transforms import ToTensor
 from torchvision import models
 
-# load pre-trained model ------------------------------------------------------
-model = models.resnet50(pretrained=True)
 
-# preprocessing stage ---------------------------------------------------------
-# transformations for the input data
-transforms = Compose(
-    [
+def preprocess_image(img_path):
+    # transformations for the input data
+    transforms = Compose([
         Resize(224, 224, interpolation=cv2.INTER_NEAREST),
         Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         ToTensor(),
-    ],
-)
-
-# read input image
-input_img = cv2.imread("turkish_coffee.jpg")
-# do transformations
-input_data = transforms(image=input_img)["image"]
-# prepare batch
-batch_data = torch.unsqueeze(input_data, 0).cuda()
-
-# inference stage -------------------------------------------------------------
-model.eval()
-model.cuda()
-output_data = model(batch_data)
-
-# post-processing stage -------------------------------------------------------
-# get class names
-with open("imagenet_classes.txt") as f:
-    classes = [line.strip() for line in f.readlines()]
-# calculate human-readable value by softmax
-confidences = torch.nn.functional.softmax(output_data, dim=1)[0] * 100
-# find top predicted classes
-_, indices = torch.sort(output_data, descending=True)
-i = 0
-# print the top classes predicted by the model
-while confidences[indices[0][i]] > 0.5:
-    class_idx = indices[0][i]
-    print(
-        "class:",
-        classes[class_idx],
-        ", confidence:",
-        confidences[class_idx].item(),
-        "%, index:",
-        class_idx.item(),
-    )
-    i += 1
-
-# convert to ONNX -------------------------------------------------------------
-onnx_filename = "resnet50.onnx"
-torch.onnx.export(
-    model,
-    batch_data,
-    onnx_filename,
-    input_names=["input"],
-    output_names=["output"],
-    export_params=True,
-)
-
-onnx_model = onnx.load(onnx_filename)
-# check that the model converted fine
-onnx.checker.check_model(onnx_model)
-
-print("Model was successfully converted to ONNX format.")
-print("It was saved to", onnx_filename)
+    ])
+
+    # read input image
+    input_img = cv2.imread(img_path)
+    # do transformations
+    input_data = transforms(image=input_img)["image"]
+    # prepare batch
+    batch_data = torch.unsqueeze(input_data, 0)
+
+    return batch_data
+
+
+def postprocess(output_data):
+    # get class names
+    with open("imagenet_classes.txt") as f:
+        classes = [line.strip() for line in f.readlines()]
+    # calculate human-readable value by softmax
+    confidences = torch.nn.functional.softmax(output_data, dim=1)[0] * 100
+    # find top predicted classes
+    _, indices = torch.sort(output_data, descending=True)
+    i = 0
+    # print the top classes predicted by the model
+    while confidences[indices[0][i]] > 0.5:
+        class_idx = indices[0][i]
+        print(
+            "class:",
+            classes[class_idx],
+            ", confidence:",
+            confidences[class_idx].item(),
+            "%, index:",
+            class_idx.item(),
+        )
+        i += 1
+
+
+def main():
+    # load pre-trained model -------------------------------------------------------------------------------------------
+    model = models.resnet50(pretrained=True)
+
+    # preprocessing stage ----------------------------------------------------------------------------------------------
+    input = preprocess_image("turkish_coffee.jpg").cuda()
+
+    # inference stage --------------------------------------------------------------------------------------------------
+    model.eval()
+    model.cuda()
+    output = model(input)
+
+    # post-processing stage --------------------------------------------------------------------------------------------
+    postprocess(output)
+
+    # convert to ONNX --------------------------------------------------------------------------------------------------
+    ONNX_FILE_PATH = "resnet50.onnx"
+    torch.onnx.export(model, input, ONNX_FILE_PATH, input_names=["input"], output_names=["output"], export_params=True)
+
+    onnx_model = onnx.load(ONNX_FILE_PATH)
+    # check that the model converted fine
+    onnx.checker.check_model(onnx_model)
+
+    print("Model was successfully converted to ONNX format.")
+    print("It was saved to", ONNX_FILE_PATH)
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,76 @@
+from pytorch_model import preprocess_image, postprocess
+import torch
+import pycuda.driver as cuda
+import pycuda.autoinit
+import numpy as np
+import tensorrt as trt
+
+
+ONNX_FILE_PATH = "resnet50.onnx"
+# logger to capture errors, warnings, and other information during the build and inference phases
+TRT_LOGGER = trt.Logger()
+
+
+def build_engine(onnx_file_path):
+    # initialize TensorRT engine and parse ONNX model
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    parser = trt.OnnxParser(network, TRT_LOGGER)
+
+    # allow TensorRT to use up to 1GB of GPU memory for tactic selection
+    builder.max_workspace_size = 1 << 30
+    # we have only one image in batch
+    builder.max_batch_size = 1
+    # use FP16 mode if possible
+    if builder.platform_has_fast_fp16:
+        builder.fp16_mode = True
+
+    # parse ONNX
+    with open(onnx_file_path, 'rb') as model:
+        print('Beginning ONNX file parsing')
+        parser.parse(model.read())
+    print('Completed parsing of ONNX file')
+
+    # generate TensorRT engine optimized for the target platform
+    print('Building an engine...')
+    engine = builder.build_cuda_engine(network)
+    context = engine.create_execution_context()
+    print("Completed creating Engine")
+    return engine, context
+
+
+def main():
+    # initialize TensorRT engine and parse ONNX model
+    engine, context = build_engine(ONNX_FILE_PATH)
+    # get sizes of input and output and allocate memory required for input data and for output data
+    for binding in engine:
+        if engine.binding_is_input(binding):  # we expect only one input
+            input_shape = engine.get_binding_shape(binding)
+            input_size = trt.volume(input_shape) * engine.max_batch_size * np.dtype(np.float32).itemsize  # in bytes
+            device_input = cuda.mem_alloc(input_size)
+        else:  # and one output
+            output_shape = engine.get_binding_shape(binding)
+            # create page-locked memory buffers (i.e. won't be swapped to disk)
+            host_output = cuda.pagelocked_empty(trt.volume(output_shape) * engine.max_batch_size, dtype=np.float32)
+            device_output = cuda.mem_alloc(host_output.nbytes)
+
+    # Create a stream in which to copy inputs/outputs and run inference.
+    stream = cuda.Stream()
+
+
+    # preprocess input data
+    host_input = np.array(preprocess_image("turkish_coffee.jpg").numpy(), dtype=np.float32, order='C')
+    cuda.memcpy_htod_async(device_input, host_input, stream)
+
+    # run inference
+    context.execute_async(bindings=[int(device_input), int(device_output)], stream_handle=stream.handle)
+    cuda.memcpy_dtoh_async(host_output, device_output, stream)
+    stream.synchronize()
+
+    # postprocess results
+    output_data = torch.Tensor(host_output).reshape(engine.max_batch_size, output_shape[0])
+    postprocess(output_data)
+
+
+if __name__ == '__main__':
+    main()