diff --git a/CMakeLists.txt b/CMakeLists.txt index ae92c2c..3741dab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,6 +86,7 @@ add_library( src/main/native/cpp/preprocess.cc src/main/native/cpp/postprocess_v5.cc src/main/native/cpp/postprocess_v8_11.cc + src/main/native/cpp/postprocess_v11obb.cc ) target_link_libraries( diff --git a/cageconverted-640-640-yolov11obbn-labels.txt b/cageconverted-640-640-yolov11obbn-labels.txt new file mode 100755 index 0000000..258f6b4 --- /dev/null +++ b/cageconverted-640-640-yolov11obbn-labels.txt @@ -0,0 +1 @@ +cage diff --git a/cageconverted-640-640-yolov11obbn.rknn b/cageconverted-640-640-yolov11obbn.rknn new file mode 100755 index 0000000..46ffae5 Binary files /dev/null and b/cageconverted-640-640-yolov11obbn.rknn differ diff --git a/out2.png b/out2.png index e6a0670..9781081 100644 Binary files a/out2.png and b/out2.png differ diff --git a/src/main/java/org/photonvision/rknn/RknnJNI.java b/src/main/java/org/photonvision/rknn/RknnJNI.java index 77ff28e..45bbef0 100644 --- a/src/main/java/org/photonvision/rknn/RknnJNI.java +++ b/src/main/java/org/photonvision/rknn/RknnJNI.java @@ -18,23 +18,41 @@ package org.photonvision.rknn; import org.opencv.core.Point; -import org.opencv.core.Rect2d; +import org.opencv.core.RotatedRect; +import org.opencv.core.Size; public class RknnJNI { public static enum ModelVersion { YOLO_V5, YOLO_V8, - YOLO_V11 + YOLO_V11, + YOLO_V11OBB } public static class RknnResult { - public RknnResult(int left, int top, int right, int bottom, float conf, int class_id) { + /** + * Creates an RknnResult object representing one detected object + * + * @param cx The x position of the center point of the bounding box + * @param cy The y position of the center point of the bounding box + * @param width The width of the bounding rectangle + * @param height The height of the bounding rectangle + * @param angle The angle, in radians, in which the rectangle is rotated about its center point. + * 0 indicates an axis-aligned bbox + * @param conf The confidence of the detection + * @param class_id The class of the detection + */ + public RknnResult( + int cx, int cy, int width, int height, float angle, float conf, int class_id) { this.conf = conf; this.class_id = class_id; - this.rect = new Rect2d(new Point(left, top), new Point(right, bottom)); + + // OpenCV uses degrees + this.rect = + new RotatedRect(new Point(cx, cy), new Size(width, height), Math.toDegrees(angle)); } - public final Rect2d rect; + public final RotatedRect rect; public final float conf; public final int class_id; diff --git a/src/main/native/cpp/main_test.cc b/src/main/native/cpp/main_test.cc index c18dbad..63b47d1 100644 --- a/src/main/native/cpp/main_test.cc +++ b/src/main/native/cpp/main_test.cc @@ -17,6 +17,11 @@ int main_test(ModelVersion version) } else if (version == ModelVersion::YOLO_V11){ printf("Starting with version 11\n"); wrapper = new YoloV11Model("note-robot-yolov11s-quant.rknn", 3, 0); + } else if (version == ModelVersion::YOLO_V11OBB){ + printf("Starting with version 11OBB\n"); + wrapper = new YoloV11OBBModel("cageconverted-640-640-yolov11obbn.rknn", 1, 0); + // wrapper = new YoloV11OBBModel("best_yolo11obbn_cage_CONVERTED_FP32.rknn", 1, 0); + // wrapper = new YoloV11OBBModel("epochbestREAL-1024-1024-yolov11obbm.rknn", 2, 0); } else { printf("Unknown version\n"); return 1; @@ -26,7 +31,9 @@ int main_test(ModelVersion version) for (int j = 0; j < 1; j++) { cv::Mat img; - img = cv::imread("robots.png"); + img = cv::imread("test_cage_2_rotated.jpg"); + + // cv::resize(img, img, cv::Size(1024, 1024)); DetectionFilterParams params { .nms_thresh = 0.45, @@ -37,27 +44,39 @@ int main_test(ModelVersion version) std::cout << "Count: " << ret.count << std::endl; for (int i = 0; i < ret.count; ++i) { std::cout << "ID: " << ret.results[i].id << " conf " << ret.results[i].obj_conf << " @ " - << ret.results[i].box.top << " - " - << ret.results[i].box.left << " - " - << ret.results[i].box.bottom << " - " - << ret.results[i].box.right << " - " + << "cx: " << ret.results[i].obb.cx << " - " + << "cy: " << ret.results[i].obb.cy << " - " + << "width: " << ret.results[i].obb.width << " - " + << "height: " << ret.results[i].obb.height << " - " + << "angle: " << ret.results[i].obb.angle << " - " << std::endl; auto *det_result = &(ret.results[i]); - int x1 = det_result->box.left; - int y1 = det_result->box.top; - int x2 = det_result->box.right; - int y2 = det_result->box.bottom; + int cx = det_result->obb.cx; + int cy = det_result->obb.cy; + int w = det_result->obb.width; + int h = det_result->obb.height; + float angle_rad = det_result->obb.angle; + float angle_deg = angle_rad * 180.0f / CV_PI; // RotatedRect expects degrees + + cv::RotatedRect rrect(cv::Point2f((float)cx, (float)cy), cv::Size2f((float)w, (float)h), angle_deg); + + cv::Point2f pts2f[4]; + rrect.points(pts2f); // fill 4 corners + + std::vector box_pts(4); + for (int k = 0; k < 4; ++k) box_pts[k] = pts2f[k]; // round to ints - cv::rectangle( - img, cv::Rect(x1, y1, x2-x1, y2-y1), (0,255,0), 3 - ); + std::vector> contour(1, box_pts); + cv::drawContours(img, contour, 0, cv::Scalar(0, 255, 0), 3); + + // place label near top left of the rotated box + cv::Rect bbox = cv::boundingRect(box_pts); - // draw_rectangle(&src_image, x1, y1, x2 - x1, y2 - y1, COLOR_BLUE, 3); char text[256]; sprintf(text, "id%i %.1f%%", det_result->id, det_result->obj_conf * 100); - cv::putText(img, text, {x1, y1 + 10}, cv::FONT_ITALIC, 0.6, {100, 255, 0}); + cv::putText(img, text, {bbox.x, bbox.y + 10}, cv::FONT_ITALIC, 1.25, {100, 255, 0}); } cv::imwrite("out2.png", img); @@ -76,6 +95,10 @@ int main() { // threads.emplace_back(std::thread([]() {main_test("../note-640-640-yolov5s.rknn");})); // for (auto& th : threads) th.join(); - main_test(ModelVersion::YOLO_V8); + main_test(ModelVersion::YOLO_V11OBB); + // main_test(ModelVersion::YOLO_V11); + // main_test(ModelVersion::YOLO_V8); // main_test(ModelVersion::YOLO_V5); + + return 0; } diff --git a/src/main/native/cpp/postprocess_v11obb.cc b/src/main/native/cpp/postprocess_v11obb.cc new file mode 100644 index 0000000..636a84c --- /dev/null +++ b/src/main/native/cpp/postprocess_v11obb.cc @@ -0,0 +1,361 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "yolov11obb/postprocess_v11obb.h" + +using std::vector; +using std::min; +using std::max; + + +// --- Helper functions --- + +// Sigmoid function +static inline float sigmoid(float x) { + return 1.0f / (1.0f + std::exp(-x)); +} + +// Compute softmax for an array of length len +static vector softmax(const float* data, int len) { + vector out(len); + float max_val = -std::numeric_limits::infinity(); + for (int i = 0; i < len; i++) { + if (data[i] > max_val) + max_val = data[i]; + } + float sum = 0.f; + for (int i = 0; i < len; i++) { + out[i] = std::exp(data[i] - max_val); + sum += out[i]; + } + for (int i = 0; i < len; i++) { + out[i] /= sum; + } + return out; +} + +// Rotate an axis-aligned rectangle defined by (x1,y1) and (x2,y2) by angle (in radians). +// Returns four points representing the rotated rectangle in the following order: +// [ rotation(x1,y1), rotation(x1,y2), rotation(x2,y2), rotation(x2,y1) ] +static vector rotate_rectangle(float x1, float y1, float x2, float y2, float angle) { + float cx = (x1 + x2) / 2.f; + float cy = (y1 + y2) / 2.f; + float cosA = std::cos(angle); + float sinA = std::sin(angle); + + vector pts(4); + pts[0] = cv::Point2f((x1 - cx) * cosA - (y1 - cy) * sinA + cx, + (x1 - cx) * sinA + (y1 - cy) * cosA + cy); + pts[1] = cv::Point2f((x1 - cx) * cosA - (y2 - cy) * sinA + cx, + (x1 - cx) * sinA + (y2 - cy) * cosA + cy); + pts[2] = cv::Point2f((x2 - cx) * cosA - (y2 - cy) * sinA + cx, + (x2 - cx) * sinA + (y2 - cy) * cosA + cy); + pts[3] = cv::Point2f((x2 - cx) * cosA - (y1 - cy) * sinA + cx, + (x2 - cx) * sinA + (y1 - cy) * cosA + cy); + + // To mimic the Python order ([pt0, pt3, pt1, pt2]): + vector rotated; + rotated.push_back(pts[0]); + rotated.push_back(pts[3]); + rotated.push_back(pts[2]); + rotated.push_back(pts[1]); + return rotated; +} + +// Compute IoU between two rotated boxes given as a polygon (list of 4 cv::Point2f). +static float computeIoU(const vector& poly1, const vector& poly2) { + // Compute area for each polygon using contourArea + float area1 = std::fabs(cv::contourArea(poly1)); + float area2 = std::fabs(cv::contourArea(poly2)); + + // Compute intersection polygon using OpenCV's convex polygon intersection + vector interPoly; + + // intersectConvexConvex returns the intersection area. + float interArea = cv::intersectConvexConvex(poly1, poly2, interPoly, true); + float unionArea = area1 + area2 - interArea; + if (unionArea <= 0.f) + return 0.f; + + return interArea / unionArea; +} + +// Structure to hold intermediate detection results +struct DetectBox { + int classId; + float score; + float xmin; + float ymin; + float xmax; + float ymax; + float angle; // in radians + bool suppressed; // flag for NMS + DetectBox(int cid, float s, float x1, float y1, float x2, float y2, float a) + : classId(cid), score(s), xmin(x1), ymin(y1), xmax(x2), ymax(y2), angle(a), suppressed(false) {} +}; + +// This function processes the raw RKNN outputs (assuming three detection branches and one angle branch) +// to produce oriented bounding boxes +int post_process_v11obb(cv::Size modelSize, + rknn_output *outputs, + BOX_RECT *padding, + float conf_threshold, + float nms_threshold, + detect_result_group_t *od_results, + int numClasses, + std::vector &output_attrs, + bool is_quant, + int n_outputs) { + if(n_outputs < 4) { + return -1; // error: insufficient outputs + } + + // We'll accumulate detection boxes from all detection branches here. + vector detBoxes; + + // --- Angle branch: assume the LAST output is the angle tensor + const int angle_out_idx = n_outputs - 1; + const rknn_tensor_attr& angle_attr = output_attrs[angle_out_idx]; + + // Data pointers for angle tensor + const float* angle_data_f = nullptr; + const int8_t* angle_data_q = nullptr; + if (is_quant) { + angle_data_q = reinterpret_cast(outputs[angle_out_idx].buf); + } else { + angle_data_f = reinterpret_cast(outputs[angle_out_idx].buf); + } + + // We'll treat the angle tensor as a single flattened buffer. + // As we iterate detection heads (branches), we advance this offset by H*W. + size_t angle_offset = 0; + + // --- Process each detection head; assume all but the last are box+class heads + for (int branch = 0; branch < angle_out_idx; ++branch) { + const rknn_tensor_attr& attr = output_attrs[branch]; + + const int N = (attr.n_dims > 0) ? static_cast(attr.dims[0]) : 1; + const int C = (attr.n_dims > 1) ? static_cast(attr.dims[1]) : 0; + const int H = (attr.n_dims > 2) ? static_cast(attr.dims[2]) : 0; + const int W = (attr.n_dims > 3) ? static_cast(attr.dims[3]) : 0; + + if (N != 1 || C <= 0 || H <= 0 || W <= 0) { + // skip this head safely + continue; + } + + const int spatial_size = H * W; + + // --- Infer layout: channels = loc_channels + numClasses + const int loc_channels = C - numClasses; + if (loc_channels < 4 || (loc_channels % 4) != 0) { + // Not a 4 layout; skip + continue; + } + + const int bins_per_side = loc_channels / 4; // 16 normally + + // Confidence starts at channel = loc_channels + auto conf_channel_of = [&](int cls) { return loc_channels + cls; }; + + // Decode stride from feature map scale + int stride = modelSize.height / H; + if (stride <= 0) stride = 1; + + // Pointers to detection head data + const float* det_data_f = nullptr; + const int8_t* det_data_q = nullptr; + if (is_quant) { + det_data_q = reinterpret_cast(outputs[branch].buf); + } else { + det_data_f = reinterpret_cast(outputs[branch].buf); + } + + // Iterate all cells × classes + for (int cls = 0; cls < numClasses; ++cls) { + const int conf_ch = conf_channel_of(cls); + // Walk H×W + for (int idx = 0; idx < spatial_size; ++idx) { + const int h_idx = idx / W; + const int w_idx = idx % W; + + // --- confidence + const int conf_index = conf_ch * spatial_size + idx; + float conf_val = 0.f; + if (is_quant) { + conf_val = (static_cast(det_data_q[conf_index]) - attr.zp) * attr.scale; + } else { + conf_val = det_data_f[conf_index]; + } + conf_val = sigmoid(conf_val); + if (conf_val < conf_threshold) continue; + + // --- DFL decode for 4 coords with bins_per_side + float coords[4] = {0.f, 0.f, 0.f, 0.f}; + for (int i = 0; i < 4; ++i) { + // gather logits for this side: channels [i*bins .. i*bins+bins-1] at (h,w) + std::vector logits(bins_per_side); + for (int b = 0; b < bins_per_side; ++b) { + const int ch = i * bins_per_side + b; + const int off = ch * spatial_size + idx; + float v = 0.f; + if (is_quant) { + v = (static_cast(det_data_q[off]) - attr.zp) * attr.scale; + } else { + v = det_data_f[off]; + } + logits[b] = v; + } + // softmax -> probs + std::vector probs = softmax(logits.data(), bins_per_side); + // soft-argmax + float s = 0.f; + for (int b = 0; b < bins_per_side; ++b) s += b * probs[b]; + coords[i] = s; + } + + // sums and halves + const float xywh_add0 = coords[0] + coords[2]; + const float xywh_add1 = coords[1] + coords[3]; + const float xywh_sub0 = (coords[2] - coords[0]) * 0.5f; + const float xywh_sub1 = (coords[3] - coords[1]) * 0.5f; + + // --- angle from the flattened angle tensor + const int angle_index = static_cast(angle_offset) + idx; // idx = h*W + w + float angle_raw = 0.f; + if (is_quant) { + angle_raw = (static_cast(angle_data_q[angle_index]) - angle_attr.zp) * angle_attr.scale; + } else { + angle_raw = angle_data_f[angle_index]; + } + const float angle_feature = (angle_raw - 0.25f) * 3.14159265358979323846f; + + const float cos_a = std::cos(angle_feature); + const float sin_a = std::sin(angle_feature); + + const float xy_mul1 = xywh_sub0 * cos_a; + const float xy_mul2 = xywh_sub1 * sin_a; + const float xy_mul3 = xywh_sub0 * sin_a; + const float xy_mul4 = xywh_sub1 * cos_a; + + float center_x = (xy_mul1 - xy_mul2) + w_idx + 0.5f; + float center_y = (xy_mul3 + xy_mul4) + h_idx + 0.5f; + + center_x *= stride; + center_y *= stride; + + const float box_w = xywh_add0 * stride; + const float box_h = xywh_add1 * stride; + + const float xmin = center_x - box_w * 0.5f; + const float ymin = center_y - box_h * 0.5f; + const float xmax = center_x + box_w * 0.5f; + const float ymax = center_y + box_h * 0.5f; + + detBoxes.emplace_back(cls, conf_val, xmin, ymin, xmax, ymax, angle_feature); + } // cells + } // classes + + // advance angle offset by this map's spatial size + angle_offset += static_cast(spatial_size); + } // branch loop + + // --- NMS + // Sort detections in descending order by confidence. + std::sort(detBoxes.begin(), detBoxes.end(), [](const DetectBox &a, const DetectBox &b) { + return a.score > b.score; + }); + + vector finalDetections; + for (size_t i = 0; i < detBoxes.size(); i++) { + if (detBoxes[i].suppressed) + continue; + + // Get rotated polygon for current box + vector poly1 = rotate_rectangle(detBoxes[i].xmin, detBoxes[i].ymin, + detBoxes[i].xmax, detBoxes[i].ymax, + detBoxes[i].angle); + finalDetections.push_back(detBoxes[i]); + + // Compare with later detections + for (size_t j = i + 1; j < detBoxes.size(); j++) { + if (detBoxes[i].classId != detBoxes[j].classId) + continue; + if (detBoxes[j].suppressed) + continue; + + vector poly2 = rotate_rectangle(detBoxes[j].xmin, detBoxes[j].ymin, + detBoxes[j].xmax, detBoxes[j].ymax, + detBoxes[j].angle); + + float iou = computeIoU(poly1, poly2); + if (iou > nms_threshold) { + detBoxes[j].suppressed = true; + } + } + } + + // --- Fix up OBBs by adjusting the padding + od_results->results.clear(); + + // Precompute + const float content_w = static_cast(padding->right - padding->left); + const float content_h = static_cast(padding->bottom - padding->top); + const float widthScale = content_w / static_cast(modelSize.width); + const float heightScale = content_h / static_cast(modelSize.height); + + for (size_t i = 0; i < finalDetections.size(); i++) { + // don't output more detections than the + // hard limit + if (i >= OBJ_NUMB_MAX_SIZE_V11OBB) { + break; + } + + const DetectBox& d = finalDetections[i]; + + float cx = 0.5f * (d.xmin + d.xmax); + float cy = 0.5f * (d.ymin + d.ymax); + float w = (d.xmax - d.xmin); + float h = (d.ymax - d.ymin); + float th = d.angle; // radians + + // Remove letterbox offsets then scale + cx -= padding->left; + cy -= padding->top; + + cx *= widthScale; + cy *= heightScale; + w *= widthScale; + h *= heightScale; + + // Clamp to content dimensions (center within image; keep width/height >= 1 pixel) + cx = std::max(0.f, std::min(cx, content_w - 1.f)); + cy = std::max(0.f, std::min(cy, content_h - 1.f)); + w = std::max(1.f, std::min(w, content_w)); + h = std::max(1.f, std::min(h, content_h)); + + detect_result_t result; + result.id = d.classId; + result.obj_conf = d.score; + + result.obb.cx = static_cast(std::round(cx)); + result.obb.cy = static_cast(std::round(cy)); + result.obb.width = static_cast(std::round(w)); + result.obb.height = static_cast(std::round(h)); + + result.obb.angle = th; // radians about (center_x, center_y) + + od_results->results.push_back(result); + } + + od_results->count = static_cast(od_results->results.size()); + + return 0; +} \ No newline at end of file diff --git a/src/main/native/cpp/postprocess_v5.cc b/src/main/native/cpp/postprocess_v5.cc index 0f66fd8..89cf8f1 100644 --- a/src/main/native/cpp/postprocess_v5.cc +++ b/src/main/native/cpp/postprocess_v5.cc @@ -323,10 +323,18 @@ int post_process_v5(int8_t *input0, int8_t *input1, int8_t *input2, int model_in float obj_conf = objProbs[i]; detect_result_t det; - det.box.left = (int)(clamp(x1, 0, model_in_w) / scale_w); - det.box.top = (int)(clamp(y1, 0, model_in_h) / scale_h); - det.box.right = (int)(clamp(x2, 0, model_in_w) / scale_w); - det.box.bottom = (int)(clamp(y2, 0, model_in_h) / scale_h); + + int resLeft = (int)(clamp(x1, 0, model_in_w) / scale_w); + int resTop = (int)(clamp(y1, 0, model_in_h) / scale_h); + int resRight = (int)(clamp(x2, 0, model_in_w) / scale_w); + int resBottom = (int)(clamp(y2, 0, model_in_h) / scale_h); + + det.obb.cx = resLeft + ((resRight - resLeft) / 2); + det.obb.cy = resTop + ((resBottom - resTop) / 2); + det.obb.width = resRight - resLeft; + det.obb.height = resBottom - resTop; + det.obb.angle = 0.0f; + det.obj_conf = obj_conf; det.id = id; group->results.push_back(det); diff --git a/src/main/native/cpp/postprocess_v8_11.cc b/src/main/native/cpp/postprocess_v8_11.cc index 3411d9a..dedb50a 100644 --- a/src/main/native/cpp/postprocess_v8_11.cc +++ b/src/main/native/cpp/postprocess_v8_11.cc @@ -447,10 +447,18 @@ int post_process_v8_11(cv::Size modelSize, rknn_output *outputs, BOX_RECT *paddi float obj_conf = objProbs[i]; detect_result_t det; - det.box.left = (int)(clamp(x1, 0, model_in_w) * widthScale); - det.box.top = (int)(clamp(y1, 0, model_in_h) * heightScale); - det.box.right = (int)(clamp(x2, 0, model_in_w) * widthScale); - det.box.bottom = (int)(clamp(y2, 0, model_in_h) * heightScale); + + int resLeft = (int)(clamp(x1, 0, model_in_w) * widthScale); + int resTop = (int)(clamp(y1, 0, model_in_h) * heightScale); + int resRight = (int)(clamp(x2, 0, model_in_w) * widthScale); + int resBottom = (int)(clamp(y2, 0, model_in_h) * heightScale); + + det.obb.cx = resLeft + ((resRight - resLeft) / 2); + det.obb.cy = resTop + ((resBottom - resTop) / 2); + det.obb.width = resRight - resLeft; + det.obb.height = resBottom - resTop; + det.obb.angle = 0.0f; + det.obj_conf = obj_conf; det.id = id; od_results->results.push_back(det); diff --git a/src/main/native/cpp/rknn_jni.cpp b/src/main/native/cpp/rknn_jni.cpp index 58a54e7..b0475df 100644 --- a/src/main/native/cpp/rknn_jni.cpp +++ b/src/main/native/cpp/rknn_jni.cpp @@ -45,12 +45,12 @@ JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved) { static jobject MakeJObject(JNIEnv *env, const detect_result_t &result) { jmethodID constructor = - env->GetMethodID(detectionResultClass, "", "(IIIIFI)V"); + env->GetMethodID(detectionResultClass, "", "(IIIIFFI)V"); // Actually call the constructor - return env->NewObject(detectionResultClass, constructor, result.box.left, - result.box.top, result.box.right, result.box.bottom, - result.obj_conf, result.id); + return env->NewObject(detectionResultClass, constructor, result.obb.cx, + result.obb.cy, result.obb.width, result.obb.height, + result.obb.angle, result.obj_conf, result.id); } /* @@ -76,6 +76,9 @@ Java_org_photonvision_rknn_RknnJNI_create } else if (static_cast(modelVer) == ModelVersion::YOLO_V11) { std::printf("Starting with version 11\n"); ret = new YoloV11Model(nativeString, numClasses, coreNum); + } else if (static_cast(modelVer) == ModelVersion::YOLO_V11OBB) { + std::printf("Starting with version 11OBB\n"); + ret = new YoloV11OBBModel(nativeString, numClasses, coreNum); } else { std::printf("Unknown version\n"); return 0; diff --git a/src/main/native/cpp/yolo_common.cpp b/src/main/native/cpp/yolo_common.cpp index 81a7160..33cc3e9 100644 --- a/src/main/native/cpp/yolo_common.cpp +++ b/src/main/native/cpp/yolo_common.cpp @@ -26,6 +26,7 @@ #include "im2d.h" #include "preprocess.h" #include "yolov8_11/postprocess_v8_11.h" +#include "yolov11obb/postprocess_v11obb.h" static unsigned char *load_data(std::FILE *fp, size_t ofst, size_t sz) { unsigned char *data; @@ -355,3 +356,23 @@ YoloV11Model::postProcess(std::vector outputs, return result; } + +detect_result_group_t +YoloV11OBBModel::postProcess(std::vector outputs, + DetectionFilterParams params, cv::Size inputImageSize, + cv::Size2d imageScale, BOX_RECT letterbox) { + detect_result_group_t result; + + BOX_RECT padding{ + 0, + inputImageSize.width, + 0, + inputImageSize.height, + }; + + post_process_v11obb(modelSize, outputs.data(), &padding, params.box_thresh, + params.nms_thresh, &result, numClasses, output_attrs, + is_quant, io_num.n_output); + + return result; +} diff --git a/src/main/native/include/yolo_common.hpp b/src/main/native/include/yolo_common.hpp index 3105da3..9349c12 100644 --- a/src/main/native/include/yolo_common.hpp +++ b/src/main/native/include/yolo_common.hpp @@ -30,7 +30,7 @@ typedef struct { double box_thresh; } DetectionFilterParams; -enum ModelVersion { YOLO_V5, YOLO_V8, YOLO_V11 }; +enum ModelVersion { YOLO_V5, YOLO_V8, YOLO_V11, YOLO_V11OBB }; class YoloModel { public: @@ -108,3 +108,17 @@ class YoloV11Model : public YoloModel { cv::Size2d inputImageScale, BOX_RECT letterbox); }; + +class YoloV11OBBModel : public YoloModel { +public: + YoloV11OBBModel(std::string modelPath, int num_classes_, int coreNumber) + : YoloModel(modelPath, num_classes_, ModelVersion::YOLO_V11OBB, coreNumber) { + } + +protected: + detect_result_group_t postProcess(std::vector output, + DetectionFilterParams params, + cv::Size inputImageSize, + cv::Size2d inputImageScale, + BOX_RECT letterbox); +}; diff --git a/src/main/native/include/yolov11obb/postprocess_v11obb.h b/src/main/native/include/yolov11obb/postprocess_v11obb.h new file mode 100644 index 0000000..53495c1 --- /dev/null +++ b/src/main/native/include/yolov11obb/postprocess_v11obb.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) Photon Vision. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef RKNN_JNI_SRC_MAIN_NATIVE_INCLUDE_YOLOV11OBB_POSTPROCESS_V11OBB_H_ +#define RKNN_JNI_SRC_MAIN_NATIVE_INCLUDE_YOLOV11OBB_POSTPROCESS_V11OBB_H_ + +#include + +#include + +#include + +#include "common.h" +#include "rknn_api.h" +#include "yolov5/postprocess_v5.h" + +#define OBJ_NUMB_MAX_SIZE_V11OBB 128 + +// #define OBJ_CLASS_NUM 3 +// #define NMS_THRESH 0.45 +// #define BOX_THRESH 0.25 + +// int init_post_process(); +// void deinit_post_process(); +// char *coco_cls_to_name(int cls_id); +int post_process_v11obb(cv::Size modelSize, rknn_output *outputs, + BOX_RECT *letter_box, float conf_threshold, + float nms_threshold, detect_result_group_t *od_results, + int numClasses, + std::vector &output_attrs, + bool is_quant, int n_outputs); + +#endif // RKNN_JNI_SRC_MAIN_NATIVE_INCLUDE_YOLOV11OBB_POSTPROCESS_V11OBB_H_ diff --git a/src/main/native/include/yolov5/postprocess_v5.h b/src/main/native/include/yolov5/postprocess_v5.h index b8e7fd0..eaf200c 100644 --- a/src/main/native/include/yolov5/postprocess_v5.h +++ b/src/main/native/include/yolov5/postprocess_v5.h @@ -36,9 +36,17 @@ typedef struct _BOX_RECT { int bottom; } BOX_RECT; +typedef struct _ROTATED_BOX_RECT { + int cx; + int cy; + int width; + int height; + float angle; +} ROTATED_BOX_RECT; + typedef struct __detect_result_t { int id; - BOX_RECT box; + ROTATED_BOX_RECT obb; float obj_conf; } detect_result_t; diff --git a/test_cage_2_rotated.jpg b/test_cage_2_rotated.jpg new file mode 100644 index 0000000..e306718 Binary files /dev/null and b/test_cage_2_rotated.jpg differ