TosylYshu
diff --git a/‎real-esrgan/CMakeLists.txt‎
Lines changed: 4 additions & 2 deletions b/‎real-esrgan/CMakeLists.txt‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎real-esrgan/common.hpp‎
Lines changed: 6 additions & 16 deletions b/‎real-esrgan/common.hpp‎
Lines changed: 6 additions & 16 deletions
diff --git a/‎real-esrgan/gen_wts.py‎
Lines changed: 2 additions & 14 deletions b/‎real-esrgan/gen_wts.py‎
Lines changed: 2 additions & 14 deletions
diff --git a/‎real-esrgan/postprocess.cu‎
Lines changed: 24 additions & 44 deletions b/‎real-esrgan/postprocess.cu‎
Lines changed: 24 additions & 44 deletions
@@ -20,8 +20,8 @@ include_directories(${PROJECT_SOURCE_DIR}/include)
 include_directories(/usr/local/cuda/include)
 link_directories(/usr/local/cuda/lib64)
 # tensorrt
-include_directories(/usr/local/TensorRT-10.10.0.31/include)
-link_directories(/usr/local/TensorRT-10.10.0.31/lib)
+include_directories(/usr/include/x86_64-linux-gnu/)
+link_directories(/usr/lib/x86_64-linux-gnu/)
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -g -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
 cuda_add_library(myplugins SHARED preprocess.cu postprocess.cu)
@@ -40,3 +40,5 @@ target_link_libraries(real-esrgan ${OpenCV_LIBS})
 if(UNIX)
 add_definitions(-O2 -pthread)
 endif(UNIX)
+
+
@@ -10,8 +10,6 @@
 
 using namespace nvinfer1;
 
-static const int PRECISION_MODE = 16;  // fp32 : 32, fp16 : 16
-
 // TensorRT weight files have a simple space delimited format:
 // [type] [size] <data x size in hex>
 std::map<std::string, Weights> loadWeights(const std::string file) {
@@ -34,22 +32,14 @@ std::map<std::string, Weights> loadWeights(const std::string file) {
         // Read name and type of blob
         std::string name;
         input >> name >> std::dec >> size;
+        wt.type = DataType::kFLOAT;
 
-        if (PRECISION_MODE == 16) {
-            wt.type = DataType::kHALF;
-            uint16_t* val = reinterpret_cast<uint16_t*>(malloc(sizeof(val) * size));
-            for (uint32_t x = 0, y = size; x < y; ++x) {
-                input >> std::hex >> val[x];
-            }
-            wt.values = val;
-        } else {
-            wt.type = DataType::kFLOAT;
-            uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
-            for (uint32_t x = 0, y = size; x < y; ++x) {
-                input >> std::hex >> val[x];
-            }
-            wt.values = val;
+        // Load blob
+        uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
+        for (uint32_t x = 0, y = size; x < y; ++x) {
+            input >> std::hex >> val[x];
         }
+        wt.values = val;
 
         wt.count = size;
         weightMap[name] = wt;
 
@@ -1,23 +1,15 @@
 import argparse
 import os
 import struct
-import numpy as np
 from basicsr.archs.rrdbnet_arch import RRDBNet
 from realesrgan import RealESRGANer
 from realesrgan.archs.srvgg_arch import SRVGGNetCompact
 
-
-def float32_to_float16_hex(value):
-    f16 = np.float16(value)
-    u16 = np.frombuffer(f16.tobytes(), dtype=np.uint16)[0]
-    return format(u16, "04x")
-
-
 def main():
     """Inference demo for Real-ESRGAN.
     """
     parser = argparse.ArgumentParser()
-    # parser.add_argument('-i', '--input', type=str, default='../TestData3', help='Input image or folder')
+    #parser.add_argument('-i', '--input', type=str, default='../TestData3', help='Input image or folder')
     parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder')
     parser.add_argument(
         '-n',
@@ -92,13 +84,9 @@ def main():
             f.write("{} {}".format(k, len(vr)))
             for vv in vr:
                 f.write(" ")
-                if args.fp32:
-                    f.write(struct.pack(">f", float(vv)).hex())
-                else:
-                    f.write(float32_to_float16_hex(float(vv)))
+                f.write(struct.pack(">f", float(vv)).hex())
             f.write("\n")
         print('Completed real-esrgan.wts file!')
 
-
 if __name__ == '__main__':
     main()
@@ -1,15 +1,14 @@
-#include "cublas_v2.h"
 #include "cuda_utils.h"
 
 using namespace std;
 
 // postprocess (NCHW->NHWC, RGB->BGR, *255, ROUND, uint8)
-template <typename T>
-__global__ void postprocess_kernel(uint8_t* output, const T* input, const int batchSize, const int height,
-                                   const int width, const int channel, const int thread_count) {
+__global__ void postprocess_kernel(uint8_t* output, float* input,
+    const int batchSize, const int height, const int width, const int channel,
+    const int thread_count)
+{
     int index = threadIdx.x + blockIdx.x * blockDim.x;
-    if (index >= thread_count)
-        return;
+    if (index >= thread_count) return;
 
     const int c_idx = index % channel;
     int idx = index / channel;
@@ -18,57 +17,38 @@ __global__ void postprocess_kernel(uint8_t* output, const T* input, const int ba
     const int h_idx = idx % height;
     const int b_idx = idx / height;
 
-    int g_idx = b_idx * height * width * channel + (2 - c_idx) * height * width + h_idx * width + w_idx;
-    float val = (float)input[g_idx];
-    float tt = val * 255.f;
+    int g_idx = b_idx * height * width * channel + (2 - c_idx)* height * width + h_idx * width + w_idx;
+    float tt = input[g_idx] * 255.f;
     if (tt > 255)
         tt = 255;
-    if (tt < 0)
-        tt = 0;
-    output[index] = (uint8_t)tt;
+    output[index] = tt;
 }
 
-template __global__ void postprocess_kernel<float>(uint8_t* output, const float* input, const int batchSize,
-                                                   const int height, const int width, const int channel,
-                                                   const int thread_count);
-template __global__ void postprocess_kernel<half>(uint8_t* output, const half* input, const int batchSize,
-                                                  const int height, const int width, const int channel,
-                                                  const int thread_count);
-
-template <typename T>
-void postprocess(uint8_t* output, const T* input, int batchSize, int height, int width, int channel,
-                 cudaStream_t stream) {
+void postprocess(uint8_t* output, float*input, int batchSize, int height, int width, int channel, cudaStream_t stream)
+{
     int thread_count = batchSize * height * width * channel;
     int block = 512;
     int grid = (thread_count - 1) / block + 1;
 
-    postprocess_kernel<T><<<grid, block, 0, stream>>>(output, input, batchSize, height, width, channel, thread_count);
+    postprocess_kernel << <grid, block, 0, stream >> > (output, input, batchSize, height, width, channel, thread_count);
 }
 
-template void postprocess<float>(uint8_t* output, const float* input, int batchSize, int height, int width, int channel,
-                                 cudaStream_t stream);
-template void postprocess<half>(uint8_t* output, const half* input, int batchSize, int height, int width, int channel,
-                                cudaStream_t stream);
 
 #include "postprocess.hpp"
 
-namespace nvinfer1 {
-int PostprocessPluginV2::enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace,
-                                 cudaStream_t stream) noexcept {
-    uint8_t* output = (uint8_t*)outputs[0];
+namespace nvinfer1
+{
+    int PostprocessPluginV2::enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept
+    {
+        float* input = (float*)inputs[0];
+        uint8_t* output = (uint8_t*)outputs[0];
 
-    const int H = mPostprocess.H;
-    const int W = mPostprocess.W;
-    const int C = mPostprocess.C;
+        const int H = mPostprocess.H;
+        const int W = mPostprocess.W;
+        const int C = mPostprocess.C;
 
-    if (mDataType == DataType::kFLOAT) {
-        const float* input = (const float*)inputs[0];
-        postprocess<float>(output, input, batchSize, H, W, C, stream);
-    } else if (mDataType == DataType::kHALF) {
-        const half* input = (const half*)inputs[0];
-        postprocess<half>(output, input, batchSize, H, W, C, stream);
-    }
+        postprocess(output, input, batchSize, H, W, C, stream);
 
-    return 0;
-}
-}  // namespace nvinfer1
+        return 0;
+    }
+}