diff --git a/samples/linux/README.md b/samples/linux/README.md index 6d489ef..7f19fde 100644 --- a/samples/linux/README.md +++ b/samples/linux/README.md @@ -159,6 +159,7 @@ The following table lists all available sample models with descriptions and exec | wideresnet50 | WideResNet50 image classification | `python wideresnet50/wideresnet50.py` | | xlsr | XLSR super-resolution | `python xlsr/xlsr.py` | | yolov8_det | YOLOv8 object detection | `python yolov8_det/yolov8_det.py` | +| yolo26n_det | YOLO26n object detection | `python yolov26n_det/yolo26n_det.py` | > **Note:** Ensure you are in the `samples/linux/python` directory before running any example. Each sample will automatically download its required model on first run. diff --git a/samples/linux/python/yolo26n_det/README.md b/samples/linux/python/yolo26n_det/README.md new file mode 100644 index 0000000..fc344a2 --- /dev/null +++ b/samples/linux/python/yolo26n_det/README.md @@ -0,0 +1,59 @@ +# yolo26n_det Sample Code + +## Introduction +This is sample code for using AppBuilder to load yolo26n_det QNN model to HTP and execute inference to predicts bounding boxes and classes of objects in an image. The yolo26n_det.py file corresponds to the qai_appbuilder version. Additionly, we provide both the ultralytics version and onnxruntime version scripts. The ultralytics version script can be used to export an .onnx model. + +## How to get the QNN format model +### 1. Export .onnx format model using ultralytics version script +```bash +python ultralytics_version.py +``` + +### 2. Set QNN related Environment Variables: +```bash +export QNN_SDK_ROOT= +export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/aarch64-oe-linux-gcc11.2:$LD_LIBRARY_PATH +export PYTHONPATH=${QNN_SDK_ROOT}/lib/python +export PATH=${QNN_SDK_ROOT}/bin/x86_64-linux-clang/:$PATH +export CPLUS_INCLUDE_PATH=/usr/include/c++/9:/usr/include/x86_64-linux-gnu/c++/9 +export LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/9 +``` + +### 3. Model Conversion +```bash +$QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-onnx-converter \ + -i ./yolo26n.onnx \ + --preserve_io layout \ + --preserve_io datatype \ + --output_path ./yolo26n.cpp +``` + +### 4. Model Lib Generation +```bash +$QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-model-lib-generator \ + -c ./yolo26n.cpp \ + -b ./yolo26n.bin \ + -t aarch64-oe-linux-gcc11.2 \ + -o ./ +``` +At this stage, a .so model library is generated. This library can be used on both the QNN CPU and HTP backends. + +### 5. Context Binary Generation +```bash +$QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --model ./x86_64-linux-clang/libyolo26n.so \ + --soc_model 77 \ + --backend ${QNN_SDK_ROOT}/lib/x86_64-linux-clang/libQnnHtp.so \ + --binary_file cntx_yolo26n_soc77_fp16 +``` +At this stage, a context binary (.bin) is generated, which can only be used on the HTP backend. + +For more information, please refer to the QAIRT SDK documentation. + +## Notes +- When running the `yolo26n_det/yolo26n_det.py` script, make sure to replace the `model_path` with your own model path generated in the previous step. + +## References +[1] https://docs.qualcomm.com/nav/home/general_tools.html?product=1601111740009302 + +[2] https://docs.ultralytics.com/models/yolo26/ \ No newline at end of file diff --git a/samples/linux/python/yolo26n_det/input.jpg b/samples/linux/python/yolo26n_det/input.jpg new file mode 100644 index 0000000..b43e311 Binary files /dev/null and b/samples/linux/python/yolo26n_det/input.jpg differ diff --git a/samples/linux/python/yolo26n_det/onnxruntime_version.py b/samples/linux/python/yolo26n_det/onnxruntime_version.py new file mode 100644 index 0000000..11bd94a --- /dev/null +++ b/samples/linux/python/yolo26n_det/onnxruntime_version.py @@ -0,0 +1,184 @@ +import argparse + +import cv2 +import numpy as np +import onnxruntime as ort + + +IMAGE_SIZE = 640 # model expects [1,3,640,640] + +# define class type (COCO 80) +class_map = { + 0: "person", + 1: "bicycle", + 2: "car", + 3: "motorcycle", + 4: "airplane", + 5: "bus", + 6: "train", + 7: "truck", + 8: "boat", + 9: "traffic light", + 10: "fire hydrant", + 11: "stop sign", + 12: "parking meter", + 13: "bench", + 14: "bird", + 15: "cat", + 16: "dog", + 17: "horse", + 18: "sheep", + 19: "cow", + 20: "elephant", + 21: "bear", + 22: "zebra", + 23: "giraffe", + 24: "backpack", + 25: "umbrella", + 26: "handbag", + 27: "tie", + 28: "suitcase", + 29: "frisbee", + 30: "skis", + 31: "snowboard", + 32: "sports ball", + 33: "kite", + 34: "baseball bat", + 35: "baseball glove", + 36: "skateboard", + 37: "surfboard", + 38: "tennis racket", + 39: "bottle", + 40: "wine glass", + 41: "cup", + 42: "fork", + 43: "knife", + 44: "spoon", + 45: "bowl", + 46: "banana", + 47: "apple", + 48: "sandwich", + 49: "orange", + 50: "broccoli", + 51: "carrot", + 52: "hot dog", + 53: "pizza", + 54: "donut", + 55: "cake", + 56: "chair", + 57: "couch", + 58: "potted plant", + 59: "bed", + 60: "dining table", + 61: "toilet", + 62: "tv", + 63: "laptop", + 64: "mouse", + 65: "remote", + 66: "keyboard", + 67: "cell phone", + 68: "microwave", + 69: "oven", + 70: "toaster", + 71: "sink", + 72: "refrigerator", + 73: "book", + 74: "clock", + 75: "vase", + 76: "scissors", + 77: "teddy bear", + 78: "hair drier", + 79: "toothbrush" +} + + +def _select_providers() -> list[str]: + available = ort.get_available_providers() + preferred = ["CUDAExecutionProvider", "CPUExecutionProvider"] + return [p for p in preferred if p in available] or available + + +def _load_image(image_path: str) -> tuple[np.ndarray, np.ndarray]: + img0 = cv2.imread(image_path) + if img0 is None: + raise FileNotFoundError(f"Failed to read image: {image_path}") + + img = cv2.resize(img0, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_LINEAR) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = img.astype(np.float32) / 255.0 + img = np.transpose(img, (2, 0, 1))[None] # NCHW + return img0, img + + +def _draw_xyxy( + frame_bgr: np.ndarray, + xyxy_640: np.ndarray, + score: float, + class_id: int, + color: tuple[int, int, int] = (0, 255, 0), + thickness: int = 2, +) -> None: + h, w = frame_bgr.shape[:2] + scale_x = w / IMAGE_SIZE + scale_y = h / IMAGE_SIZE + + x1, y1, x2, y2 = xyxy_640.tolist() + x1 = int(max(0.0, min(IMAGE_SIZE, x1)) * scale_x) + y1 = int(max(0.0, min(IMAGE_SIZE, y1)) * scale_y) + x2 = int(max(0.0, min(IMAGE_SIZE, x2)) * scale_x) + y2 = int(max(0.0, min(IMAGE_SIZE, y2)) * scale_y) + + cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), color, thickness) + class_name = class_map.get(class_id, "Unknown") + label = f"{score:.2f} {class_name}" + cv2.putText( + frame_bgr, + label, + (x1, max(0, y1 - 8)), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + color, + 2, + cv2.LINE_AA, + ) + + +def main() -> int: + parser = argparse.ArgumentParser(description="YOLO26 ONNXRuntime inference (no NMS).") + parser.add_argument("--model", default="yolo26n.onnx", help="Path to YOLO26 ONNX model") + parser.add_argument("--image", default="input.jpg", help="Path to input image") + parser.add_argument("--output", default="output.jpg", help="Path to save visualization") + parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold") + args = parser.parse_args() + + providers = _select_providers() + try: + sess = ort.InferenceSession(args.model, providers=providers) + except Exception: + sess = ort.InferenceSession(args.model, providers=["CPUExecutionProvider"]) + providers = ["CPUExecutionProvider"] + + input_name = sess.get_inputs()[0].name + + img0, inp = _load_image(args.image) + (out,) = sess.run(None, {input_name: inp}) + + det = np.asarray(out, dtype=np.float32)[0] # [300,6] + scores = det[:, 4] + keep = scores >= float(args.conf) + det = det[keep] + det = det[np.argsort(-det[:, 4])] + + for x1, y1, x2, y2, score, cls in det: + _draw_xyxy(img0, np.array([x1, y1, x2, y2], dtype=np.float32), float(score), int(cls)) + + if not cv2.imwrite(args.output, img0): + raise RuntimeError(f"Failed to write image: {args.output}") + + print(f"onnxruntime={ort.__version__} providers={providers}") + print(f"detections={len(det)} saved={args.output}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/samples/linux/python/yolo26n_det/ultralytics_version.py b/samples/linux/python/yolo26n_det/ultralytics_version.py new file mode 100644 index 0000000..ddb075c --- /dev/null +++ b/samples/linux/python/yolo26n_det/ultralytics_version.py @@ -0,0 +1,13 @@ +from ultralytics import YOLO + +# Load the YOLO26 model +model = YOLO("yolo26n.pt or your .pt model path") + +# Export the model to ONNX format +model.export(format="onnx") # creates 'yolo26n.onnx' + +# Load the exported ONNX model +onnx_model = YOLO("your .onnx model path") + +# Run inference +results = onnx_model("your input image") \ No newline at end of file diff --git a/samples/linux/python/yolo26n_det/yolo26n_det.py b/samples/linux/python/yolo26n_det/yolo26n_det.py new file mode 100644 index 0000000..e1f358a --- /dev/null +++ b/samples/linux/python/yolo26n_det/yolo26n_det.py @@ -0,0 +1,326 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +import sys +import os +sys.path.append(".") +sys.path.append("python") +sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")) + +import utils.install as install +import cv2 +import numpy as np +import torch +import torchvision.transforms as transforms +from PIL import Image +from PIL.Image import fromarray as ImageFromArray +from torch.nn.functional import interpolate, pad +from torchvision.ops import nms +from typing import List, Tuple, Optional, Union, Callable +import argparse + +from qai_appbuilder import (QNNContext, Runtime, LogLevel, ProfilingLevel, PerfProfile, QNNConfig) + +#################################################################### + +IMAGE_SIZE = 640 + +#################################################################### + +execution_ws=os.path.dirname(os.path.abspath(__file__)) +print(f"Current file directory: {execution_ws}") +qnn_sdk_root = os.environ.get("QNN_SDK_ROOT") +if not qnn_sdk_root: + print("Error: QNN_SDK_ROOT environment variable is not set.") + sys.exit(1) + +qnn_dir = os.path.join(qnn_sdk_root, "lib/aarch64-oe-linux-gcc11.2") + +model_path = "" + +#################################################################### + +SOC_ID = None +cleaned_argv = [] +i = 0 +while i < len(sys.argv): + if sys.argv[i] == '--chipset': + SOC_ID = sys.argv[i + 1] + i += 2 + else: + cleaned_argv.append(sys.argv[i]) + i += 1 + +sys.argv = cleaned_argv + +print(f"SOC_ID: {SOC_ID}") + +yolo26n = None + +# define class type +class_map = { + 0: "person", + 1: "bicycle", + 2: "car", + 3: "motorcycle", + 4: "airplane", + 5: "bus", + 6: "train", + 7: "truck", + 8: "boat", + 9: "traffic light", + 10: "fire hydrant", + 11: "stop sign", + 12: "parking meter", + 13: "bench", + 14: "bird", + 15: "cat", + 16: "dog", + 17: "horse", + 18: "sheep", + 19: "cow", + 20: "elephant", + 21: "bear", + 22: "zebra", + 23: "giraffe", + 24: "backpack", + 25: "umbrella", + 26: "handbag", + 27: "tie", + 28: "suitcase", + 29: "frisbee", + 30: "skis", + 31: "snowboard", + 32: "sports ball", + 33: "kite", + 34: "baseball bat", + 35: "baseball glove", + 36: "skateboard", + 37: "surfboard", + 38: "tennis racket", + 39: "bottle", + 40: "wine glass", + 41: "cup", + 42: "fork", + 43: "knife", + 44: "spoon", + 45: "bowl", + 46: "banana", + 47: "apple", + 48: "sandwich", + 49: "orange", + 50: "broccoli", + 51: "carrot", + 52: "hot dog", + 53: "pizza", + 54: "donut", + 55: "cake", + 56: "chair", + 57: "couch", + 58: "potted plant", + 59: "bed", + 60: "dining table", + 61: "toilet", + 62: "tv", + 63: "laptop", + 64: "mouse", + 65: "remote", + 66: "keyboard", + 67: "cell phone", + 68: "microwave", + 69: "oven", + 70: "toaster", + 71: "sink", + 72: "refrigerator", + 73: "book", + 74: "clock", + 75: "vase", + 76: "scissors", + 77: "teddy bear", + 78: "hair drier", + 79: "toothbrush" +} + +def preprocess_PIL_image(image: Image) -> torch.Tensor: + """Convert a PIL image into a pyTorch tensor with range [0, 1] and shape NCHW.""" + transform = transforms.Compose([transforms.PILToTensor()]) # bgr image + img: torch.Tensor = transform(image) # type: ignore + img = img.float().unsqueeze(0) / 255.0 # int 0 - 255 to float 0.0 - 1.0 + return img + +def torch_tensor_to_PIL_image(data: torch.Tensor) -> Image: + """ + Convert a Torch tensor (dtype float32) with range [0, 1] and shape CHW into PIL image CHW + """ + out = torch.clip(data, min=0.0, max=1.0) + np_out = (out.permute(1, 2, 0).detach().numpy() * 255).astype(np.uint8) + return ImageFromArray(np_out) + +def draw_box_from_xyxy( + frame: np.ndarray, + top_left: np.ndarray | torch.Tensor | Tuple[int, int], + bottom_right: np.ndarray | torch.Tensor | Tuple[int, int], + color: Tuple[int, int, int] = (0, 0, 0), + size: int = 3, + text: Optional[str] = None, +): + """ + Draw a box using the provided top left / bottom right points to compute the box. + + Parameters: + frame: np.ndarray + np array (H W C x uint8, BGR) + + box: np.ndarray | torch.Tensor + array (4), where layout is + [xc, yc, h, w] + + color: Tuple[int, int, int] + Color of drawn points and connection lines (RGB) + + size: int + Size of drawn points and connection lines BGR channel layout + + text: None | str + Overlay text at the top of the box. + + Returns: + None; modifies frame in place. + """ + original_height, original_width = frame.shape[:2] + scale_x = original_width / IMAGE_SIZE + scale_y = original_height / IMAGE_SIZE + if not isinstance(top_left, tuple): + top_left = (int(top_left[0].item()*scale_x), int(top_left[1].item()*scale_y)) + if not isinstance(bottom_right, tuple): + bottom_right = (int(bottom_right[0].item()*scale_x), int(bottom_right[1].item()*scale_y)) + + cv2.rectangle(frame, top_left, bottom_right, color, size) + if text is not None: + cv2.putText( + frame, + text, + (top_left[0], top_left[1] - 10), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + color, + size, + ) + +# Yolo26 class which inherited from the class QNNContext. +class Yolo26(QNNContext): + def Inference(self, input_data): + input_datas=[input_data] + output_data = super().Inference(input_datas) + return output_data + + +def Init(): + global yolo26 + + # Config AppBuilder environment. + QNNConfig.Config(qnn_dir, Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) + + # Instance for Yolo26 objects. + yolo26 = Yolo26("yolo26", model_path) + + + +def Inference(input_image_path, output_image_path, show_image=True, confidence: float = 0.60): + global image_buffer + + # Read and preprocess the image. + image = Image.open(input_image_path) + image = image.resize((IMAGE_SIZE, IMAGE_SIZE)) + outputImg = Image.open(input_image_path) + image = preprocess_PIL_image(image) # transfer raw image to torch tensor format + # image = image.permute(0, 2, 3, 1) + image = image.numpy() + + output_image = np.array(outputImg.convert("RGB")) # transfer to numpy array + + # Burst the HTP. + PerfProfile.SetPerfProfileGlobal(PerfProfile.BURST) + + # Run the inference. + model_output = yolo26.Inference(image) + + pred_boxes = torch.tensor(model_output[0][:, :, :4]) + pred_scores = torch.tensor(model_output[0][:, :, 4]) + pred_class_idx = torch.tensor(model_output[0][:, :, 5]) + + # Reset the HTP. + PerfProfile.RelPerfProfileGlobal() + + # Add boxes to each batch + for batch_idx in range(len(pred_boxes)): + pred_boxes_batch = pred_boxes[batch_idx] + pred_scores_batch = pred_scores[batch_idx] + pred_class_idx_batch = pred_class_idx[batch_idx] + + keep_mask = pred_scores_batch >= confidence + if not torch.any(keep_mask): + continue + + pred_boxes_batch = pred_boxes_batch[keep_mask] + pred_scores_batch = pred_scores_batch[keep_mask] + pred_class_idx_batch = pred_class_idx_batch[keep_mask] + + for box, score, class_idx in zip(pred_boxes_batch, pred_scores_batch, pred_class_idx_batch): + class_idx_item = round(class_idx.item()) + class_name = class_map.get(class_idx_item, "Unknown") + draw_box_from_xyxy( + output_image, + box[0:2].int(), + box[2:4].int(), + color=(0, 255, 0), + size=2, + text=f'{score.item():.2f} {class_name}' + ) + + #save and display the output_image + output_image = Image.fromarray(output_image) + output_image.save(output_image_path) + + if show_image: + output_image.show() + +def Release(): + global yolo26 + + # Release the resources. + del(yolo26) + + +def main(input_image_path=None, output_image_path=None, show_image=False, confidence: float = 0.80): + + if input_image_path is None: + input_image_path = os.path.join(execution_ws, "input.jpg") + + if output_image_path is None: + output_image_path = os.path.join(execution_ws, "output.jpg") + + Init() + + Inference( + input_image_path=input_image_path, + output_image_path=output_image_path, + show_image=show_image, + confidence=confidence, + ) + + Release() + + return "Yolo26 Inference Result" + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Process a single image path.") + parser.add_argument('--input_image_path', help='Path to the input image', default=None) + #input_image_path, output_image_path + parser.add_argument('--output_image_path', help='Path to the output image', default=None) + parser.add_argument('--confidence', help='Score threshold for keeping boxes', type=float, default=0.60) + args = parser.parse_args() + + main(args.input_image_path, args.output_image_path, confidence=args.confidence)