Yolo26-Cls Added (#1704)

fazligorkembal · web-flow · commit e29066e91f5d · 2026-02-10T11:23:18.000+08:00
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ The basic workflow of TensorRTx is:
 
 ## News
 
-- `2 Feb 2026`. [fazligorkembal](https://github.com/fazligorkembal) Yolo26-Det, Yolo26-Obb
+- `2 Feb 2026`. [fazligorkembal](https://github.com/fazligorkembal) Yolo26-Det, Yolo26-Obb, Yolo26-Cls
 - `15 Jan 2026`. [zgjja](https://github.com/zgjja) Refactor multiple old CV models to support TensorRT SDK through 7~10.
 - `8 Jan 2026`. [ydk61](https://github.com/ydk61): YOLOv13
 - `10 May 2025`. [pranavm-nvidia](https://github.com/pranavm-nvidia): [YOLO11](./yolo11_tripy) writen in [Tripy](https://github.com/NVIDIA/TensorRT-Incubator/tree/main/tripy).
diff --git a/yolo26/CMakeLists.txt b/yolo26/CMakeLists.txt
@@ -48,4 +48,10 @@ add_executable(yolo26_obb ${PROJECT_SOURCE_DIR}/yolo26_obb.cpp ${SRCS})
 target_link_libraries(yolo26_obb nvinfer)
 target_link_libraries(yolo26_obb cudart)
 target_link_libraries(yolo26_obb yololayerplugins)
-target_link_libraries(yolo26_obb ${OpenCV_LIBS})
+target_link_libraries(yolo26_obb ${OpenCV_LIBS})
+
+add_executable(yolo26_cls ${PROJECT_SOURCE_DIR}/yolo26_cls.cpp ${SRCS})
+target_link_libraries(yolo26_cls nvinfer)
+target_link_libraries(yolo26_cls cudart)
+target_link_libraries(yolo26_cls yololayerplugins)
+target_link_libraries(yolo26_cls ${OpenCV_LIBS})
diff --git a/yolo26/README.md b/yolo26/README.md
@@ -16,6 +16,7 @@ Training code [link](https://github.com/ultralytics/ultralytics/archive/refs/tag
 
 * [✅] Yolo26n-det, Yolo26s-det, Yolo26m-det, Yolo26l-det, Yolo26sx-det, support FP32/FP16 and C++ API
 * [✅] Yolo26n-obb, Yolo26s-obb, Yolo26m-obb, Yolo26l-obb, Yolo26sx-obb, support FP32/FP16 and C++ API
+* [✅] Yolo26n-cls, Yolo26s-cls, Yolo26m-cls, Yolo26l-cls, Yolo26sx-cls, support FP32/FP16 and C++ API
 
 ## COMING FEATURES
 * [⏳] Windows OS Support
@@ -54,6 +55,13 @@ cp [PATH-TO-MAIN-FOLDER]/gen_wts.py .
 python gen_wts.py -w yolo26n-obb.pt -o yolo26n-obb.wts -t obb
 # A file 'yolo26n-obb.wts' will be generated.
 
+# Download models for Cls
+wget https://github.com/ultralytics/assets/releases/download/v8.4.0/yolo26n-cls.pt -O yolo26n-cls.pt # to download other models, replace 'yolo26n-cls.pt' with 'yolo26s-cls.pt', 'yolo26m-cls.pt', 'yolo26l-cls.pt' or 'yolo26x-cls.pt'
+# Generate .wts
+cp [PATH-TO-MAIN-FOLDER]/gen_wts.py .
+python gen_wts.py -w yolo26n-cls.pt -o yolo26n-cls.wts -t cls
+# A file 'yolo26n-cls.wts' will be generated.
+
 ```
 
 2. build and run
@@ -81,7 +89,20 @@ cp [PATH-TO-ultralytics]/yolo26n-obb.wts .
 # Build and serialize TensorRT engine
 ./yolo26_obb -s yolo26n-obb.wts yolo26n-obb.engine [n/s/m/l/x]
 # Run inference
-./yolo26_obb -d yolo26n.engine ../images
+./yolo26_obb -d yolo26n-obb.engine ../images
+# results saved in build directory
+```
+
+### Cls
+```shell
+Generate classification text file in build folder or download it
+# wget https://github.com/joannzhang00/ImageNet-dataset-classes-labels/blob/main/imagenet_classes.txt
+
+cp [PATH-TO-ultralytics]/yolo26n-cls.wts .
+# Build and serialize TensorRT engine
+./yolo26_cls -s yolo26n-cls.wts yolo26n-cls.engine [n/s/m/l/x]
+# Run inference
+./yolo26_cls -d yolo26n-cls.engine ../images
 # results saved in build directory
 ```
 
diff --git a/yolo26/include/model.h b/yolo26/include/model.h
@@ -10,4 +10,8 @@ nvinfer1::IHostMemory* buildEngineYolo26Det(nvinfer1::IBuilder* builder, nvinfer
 
 nvinfer1::IHostMemory* buildEngineYolo26Obb(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
                                             nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
-                                            int& max_channels, std::string& type);
+                                            int& max_channels, std::string& type);
+
+nvinfer1::IHostMemory* buildEngineYolo26Cls(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
+                                            nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
+                                            int& max_channels, std::string& type);
diff --git a/yolo26/include/utils.h b/yolo26/include/utils.h
@@ -47,6 +47,22 @@ static inline int read_files_in_dir(const char* p_dir_name, std::vector<std::str
     return 0;
 }
 
+inline std::vector<std::string> read_classes(std::string file_name) {
+    std::vector<std::string> classes;
+    std::ifstream ifs(file_name, std::ios::in);
+    if (!ifs.is_open()) {
+        std::cerr << file_name << " is not found, pls refer to README and download it." << std::endl;
+        assert(0);
+    }
+    std::string s;
+    while (std::getline(ifs, s)) {
+        // std::cout << "Read class: " << s << std::endl;
+        classes.push_back(s);
+    }
+    ifs.close();
+    return classes;
+}
+
 // Function to trim leading and trailing whitespace from a string
 static inline std::string trim_leading_whitespace(const std::string& str) {
     size_t first = str.find_first_not_of(' ');
diff --git a/yolo26/src/model.cpp b/yolo26/src/model.cpp
@@ -87,9 +87,9 @@ nvinfer1::IHostMemory* buildEngineYolo26Det(nvinfer1::IBuilder* builder, nvinfer
             C3K2(network, weightMap, *block7->getOutput(0), get_width(1024, gw, max_channels),
                  get_width(1024, gw, max_channels), get_depth(2, gd), true, true, false, 0.5, "model.8");
 
-    nvinfer1::IElementWiseLayer* block9 = SPPF(network, weightMap, *block8->getOutput(0),
-                                               get_width(1024, gw, max_channels), get_width(1024, gw, max_channels), 5,
-                                               true, "model.9");  // TODO: VERIFY THIS BLOCK FOR OTHER YOLO26 MODELS
+    nvinfer1::IElementWiseLayer* block9 =
+            SPPF(network, weightMap, *block8->getOutput(0), get_width(1024, gw, max_channels),
+                 get_width(1024, gw, max_channels), 5, true, "model.9");
 
     nvinfer1::IElementWiseLayer* block10 =
             C2PSA(network, weightMap, *block9->getOutput(0), get_width(1024, gw, max_channels),
@@ -869,4 +869,112 @@ nvinfer1::IHostMemory* buildEngineYolo26Obb(nvinfer1::IBuilder* builder, nvinfer
         free((void*)(mem.second.values));
     }
     return serialized_model;
-}
+}
+
+nvinfer1::IHostMemory* buildEngineYolo26Cls(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
+                                            nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
+                                            int& max_channels, std::string& type) {
+    std::map<std::string, nvinfer1::Weights> weightMap = loadWeights(wts_path);
+
+    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(
+            1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
+
+    /*******************************************************************************************************
+     ******************************************  YOLO26 INPUT  **********************************************
+     *******************************************************************************************************/
+
+    nvinfer1::ITensor* data =
+            network->addInput(kInputTensorName, dt, nvinfer1::Dims4{kBatchSize, 3, kClsInputH, kClsInputW});
+    assert(data);
+
+    /*******************************************************************************************************
+    *****************************************  YOLO26 BACKBONE  ********************************************
+    *******************************************************************************************************/
+
+    nvinfer1::IElementWiseLayer* block0 =
+            convBnSiLU(network, weightMap, *data, get_width(64, gw, max_channels), {3, 3}, 2, "model.0");
+
+    nvinfer1::IElementWiseLayer* block1 = convBnSiLU(network, weightMap, *block0->getOutput(0),
+                                                     get_width(128, gw, max_channels), {3, 3}, 2, "model.1");
+
+    bool c3k = false;
+    if (type == "m" || type == "l" || type == "x") {
+        c3k = true;
+    }
+
+    nvinfer1::IElementWiseLayer* conv2 =
+            C3K2(network, weightMap, *block1->getOutput(0), get_width(128, gw, max_channels),
+                 get_width(256, gw, max_channels), get_depth(2, gd), c3k, true, false, 0.25, "model.2");
+
+    nvinfer1::IElementWiseLayer* block3 = convBnSiLU(network, weightMap, *conv2->getOutput(0),
+                                                     get_width(256, gw, max_channels), {3, 3}, 2, "model.3");
+
+    nvinfer1::IElementWiseLayer* block4 =
+            C3K2(network, weightMap, *block3->getOutput(0), get_width(256, gw, max_channels),
+                 get_width(512, gw, max_channels), get_depth(2, gd), c3k, true, false, 0.25, "model.4");
+
+    nvinfer1::IElementWiseLayer* block5 = convBnSiLU(network, weightMap, *block4->getOutput(0),
+                                                     get_width(512, gw, max_channels), {3, 3}, 2, "model.5");
+
+    nvinfer1::IElementWiseLayer* block6 =
+            C3K2(network, weightMap, *block5->getOutput(0), get_width(512, gw, max_channels),
+                 get_width(512, gw, max_channels), get_depth(2, gd), true, true, false, 0.5, "model.6");
+
+    nvinfer1::IElementWiseLayer* block7 = convBnSiLU(network, weightMap, *block6->getOutput(0),
+                                                     get_width(1024, gw, max_channels), {3, 3}, 2, "model.7");
+
+    nvinfer1::IElementWiseLayer* block8 =
+            C3K2(network, weightMap, *block7->getOutput(0), get_width(1024, gw, max_channels),
+                 get_width(1024, gw, max_channels), get_depth(2, gd), true, true, false, 0.5, "model.8");
+
+    nvinfer1::IElementWiseLayer* block9 =
+            C2PSA(network, weightMap, *block8->getOutput(0), get_width(1024, gw, max_channels),
+                  get_width(1024, gw, max_channels), get_depth(2, gd), 0.5, "model.9");
+
+    /////////////////////////////////////////////////////
+
+    nvinfer1::IElementWiseLayer* block10_convbn =
+            convBnSiLU(network, weightMap, *block9->getOutput(0), 1280, {1, 1}, 1, "model.10.conv");
+    nvinfer1::Dims dims =
+            block10_convbn->getOutput(0)->getDimensions();  // Obtain the dimensions of the output of conv_class
+    assert(dims.nbDims == 4);
+    nvinfer1::IPoolingLayer* block10_pool = network->addPoolingNd(
+            *block10_convbn->getOutput(0), nvinfer1::PoolingType::kAVERAGE, nvinfer1::DimsHW{dims.d[2], dims.d[3]});
+    nvinfer1::IShuffleLayer* block10_reshape = network->addShuffle(*block10_pool->getOutput(0));
+    block10_reshape->setReshapeDimensions(nvinfer1::Dims2{kBatchSize, 1280});
+    nvinfer1::IConstantLayer* block10_linear_weight =
+            network->addConstant(nvinfer1::Dims2{kClsNumClass, 1280}, weightMap["model.10.linear.weight"]);
+    nvinfer1::IConstantLayer* block10_linear_bias =
+            network->addConstant(nvinfer1::Dims2{kClsNumClass, 1}, weightMap["model.10.linear.bias"]);
+    nvinfer1::IMatrixMultiplyLayer* block10_linear_matrix_multiply =
+            network->addMatrixMultiply(*block10_reshape->getOutput(0), nvinfer1::MatrixOperation::kNONE,
+                                       *block10_linear_weight->getOutput(0), nvinfer1::MatrixOperation::kTRANSPOSE);
+    nvinfer1::IElementWiseLayer* block10_linear_add =
+            network->addElementWise(*block10_linear_matrix_multiply->getOutput(0), *block10_linear_bias->getOutput(0),
+                                    nvinfer1::ElementWiseOperation::kSUM);
+    nvinfer1::IActivationLayer* output =
+            network->addActivation(*block10_linear_add->getOutput(0), nvinfer1::ActivationType::kSIGMOID);
+    assert(output);
+
+    output->getOutput(0)->setName(kOutputTensorName);
+    network->markOutput(*output->getOutput(0));
+    // Use setMemoryPoolLimit instead of deprecated setMaxWorkspaceSize
+    config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 16 * (1 << 20));
+
+#if defined(USE_FP16)
+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
+#elif defined(USE_INT8)
+    std::cerr << "INT8 not supported for YOLO26 model yet." << std::endl;
+#endif
+
+    std::cout << "Building engine, please wait for a while..." << std::endl;
+    nvinfer1::IHostMemory* serialized_model = builder->buildSerializedNetwork(*network, *config);
+    std::cout << "Build engine successfully!" << std::endl;
+
+    delete network;
+
+    for (auto& mem : weightMap) {
+        free((void*)(mem.second.values));
+    }
+    return serialized_model;
+}
diff --git a/yolo26/yolo26_cls.cpp b/yolo26/yolo26_cls.cpp