Skip to content

Commit a1aa4ca

Browse files
authored
Add files via upload
1 parent 36e7b34 commit a1aa4ca

File tree

2 files changed

+195
-0
lines changed

2 files changed

+195
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* Copyright (c) 2018-2019 NVIDIA Corporation. All rights reserved.
3+
*
4+
* NVIDIA Corporation and its licensors retain all intellectual property
5+
* and proprietary rights in and to this software, related documentation
6+
* and any modifications thereto. Any use, reproduction, disclosure or
7+
* distribution of this software and related documentation without an express
8+
* license agreement from NVIDIA Corporation is strictly prohibited.
9+
*
10+
*/
11+
12+
#include <cuda.h>
13+
#include <cuda_runtime.h>
14+
#include <stdint.h>
15+
#include <stdio.h>
16+
#include <string.h>
17+
18+
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
19+
20+
__global__ void gpuYoloLayerV3(const float* input, float* output, const uint gridSize, const uint numOutputClasses,
21+
const uint numBBoxes)
22+
{
23+
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
24+
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
25+
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
26+
27+
if ((x_id >= gridSize) || (y_id >= gridSize) || (z_id >= numBBoxes))
28+
{
29+
return;
30+
}
31+
32+
const int numGridCells = gridSize * gridSize;
33+
const int bbindex = y_id * gridSize + x_id;
34+
35+
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
36+
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]);
37+
38+
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
39+
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]);
40+
41+
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
42+
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
43+
44+
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
45+
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
46+
47+
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
48+
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
49+
50+
for (uint i = 0; i < numOutputClasses; ++i)
51+
{
52+
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
53+
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
54+
}
55+
}
56+
57+
cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
58+
const uint& numOutputClasses, const uint& numBBoxes,
59+
uint64_t outputSize, cudaStream_t stream);
60+
61+
cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
62+
const uint& numOutputClasses, const uint& numBBoxes,
63+
uint64_t outputSize, cudaStream_t stream)
64+
{
65+
dim3 threads_per_block(16, 16, 4);
66+
dim3 number_of_blocks((gridSize / threads_per_block.x) + 1,
67+
(gridSize / threads_per_block.y) + 1,
68+
(numBBoxes / threads_per_block.z) + 1);
69+
for (unsigned int batch = 0; batch < batchSize; ++batch)
70+
{
71+
gpuYoloLayerV3<<<number_of_blocks, threads_per_block, 0, stream>>>(
72+
reinterpret_cast<const float*>(input) + (batch * outputSize),
73+
reinterpret_cast<float*>(output) + (batch * outputSize), gridSize, numOutputClasses,
74+
numBBoxes);
75+
}
76+
return cudaGetLastError();
77+
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a
5+
* copy of this software and associated documentation files (the "Software"),
6+
* to deal in the Software without restriction, including without limitation
7+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8+
* and/or sell copies of the Software, and to permit persons to whom the
9+
* Software is furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be included in
12+
* all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20+
* DEALINGS IN THE SOFTWARE.
21+
*/
22+
23+
#include "nvdsinfer_custom_impl.h"
24+
#include "nvdsinfer_context.h"
25+
#include "yoloPlugins.h"
26+
#include "yolo.h"
27+
28+
#include <algorithm>
29+
30+
#define USE_CUDA_ENGINE_GET_API 1
31+
32+
static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams)
33+
{
34+
std::string yoloCfg = initParams->customNetworkConfigFilePath;
35+
std::string yoloType;
36+
37+
std::transform (yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
38+
return std::tolower (c);});
39+
40+
if (yoloCfg.find("yolov2") != std::string::npos) {
41+
if (yoloCfg.find("yolov2-tiny") != std::string::npos)
42+
yoloType = "yolov2-tiny";
43+
else
44+
yoloType = "yolov2";
45+
} else if (yoloCfg.find("yolov3") != std::string::npos) {
46+
if (yoloCfg.find("yolov3-tiny") != std::string::npos)
47+
yoloType = "yolov3-tiny";
48+
else
49+
yoloType = "yolov3";
50+
} else {
51+
std::cerr << "Yolo type is not defined from config file name:"
52+
<< yoloCfg << std::endl;
53+
return false;
54+
}
55+
56+
networkInfo.networkType = yoloType;
57+
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
58+
networkInfo.wtsFilePath = initParams->modelFilePath;
59+
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
60+
networkInfo.inputBlobName = "data";
61+
62+
if (networkInfo.configFilePath.empty() ||
63+
networkInfo.wtsFilePath.empty()) {
64+
std::cerr << "Yolo config file or weights file is NOT specified."
65+
<< std::endl;
66+
return false;
67+
}
68+
69+
if (!fileExists(networkInfo.configFilePath) ||
70+
!fileExists(networkInfo.wtsFilePath)) {
71+
std::cerr << "Yolo config file or weights file is NOT exist."
72+
<< std::endl;
73+
return false;
74+
}
75+
76+
return true;
77+
}
78+
79+
#if !USE_CUDA_ENGINE_GET_API
80+
IModelParser* NvDsInferCreateModelParser(
81+
const NvDsInferContextInitParams* initParams) {
82+
NetworkInfo networkInfo;
83+
if (!getYoloNetworkInfo(networkInfo, initParams)) {
84+
return nullptr;
85+
}
86+
87+
return new Yolo(networkInfo);
88+
}
89+
#else
90+
extern "C"
91+
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
92+
const NvDsInferContextInitParams * const initParams,
93+
nvinfer1::DataType dataType,
94+
nvinfer1::ICudaEngine *& cudaEngine);
95+
96+
extern "C"
97+
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
98+
const NvDsInferContextInitParams * const initParams,
99+
nvinfer1::DataType dataType,
100+
nvinfer1::ICudaEngine *& cudaEngine)
101+
{
102+
NetworkInfo networkInfo;
103+
if (!getYoloNetworkInfo(networkInfo, initParams)) {
104+
return false;
105+
}
106+
107+
Yolo yolo(networkInfo);
108+
cudaEngine = yolo.createEngine (builder);
109+
if (cudaEngine == nullptr)
110+
{
111+
std::cerr << "Failed to build cuda engine on "
112+
<< networkInfo.configFilePath << std::endl;
113+
return false;
114+
}
115+
116+
return true;
117+
}
118+
#endif

0 commit comments

Comments
 (0)