diff --git a/neural-networks/README.md b/neural-networks/README.md index e37dd16ff..ee0fc1094 100644 --- a/neural-networks/README.md +++ b/neural-networks/README.md @@ -49,26 +49,27 @@ LEGEND: ✅: available; ❌: not available; 🚧: work in progress | :---------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------: | | yolov6-nano | yolop | -| Name | HubAI Model | RVC2 | RVC4 (peripheral) | RVC4 (standalone) | DepthAIv2 | Notes | -| -------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ----------------- | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------- | -| [generic-example](generic-example/) | [mobilenet-ssd](https://models.luxonis.com/luxonis/mobilenet-ssd/2da6e0a5-4785-488d-8cf5-c35f7ec1a1ed) | ✅ | ❌ | ❌ | [gen2-efficientDet](https://github.com/luxonis/oak-examples/tree/master/gen2-efficientDet) | Model not available for RVC4. | -| [generic-example](generic-example/) | [yolov6-nano](https://models.luxonis.com/luxonis/yolov6-nano/face58c4-45ab-42a0-bafc-19f9fee8a034) / [yolov6-large](https://models.luxonis.com/luxonis/yolov6-large/7937248a-c310-4765-97db-0850086f2dd9) / [yolov10-nano](https://models.luxonis.com/luxonis/yolov10-nano/03153a9a-06f7-4ce9-b655-3762d21d0a8a) | ✅ | ✅ | ✅ | [gen2-yolo](https://github.com/luxonis/oak-examples/tree/master/gen2-yolo) | | -| [generic-example](generic-example/) | [qrdet](https://models.luxonis.com/luxonis/qrdet/d1183a0f-e9a0-4fa2-8437-f2f5b0181739) | ✅ | ✅ | ✅ | [gen2-qr-code-scanner](https://github.com/luxonis/oak-examples/tree/master/gen2-qr-code-scanner) | | -| [generic-example](generic-example/) | [barcode-detection](https://models.luxonis.com/luxonis/barcode-detection/75edea0f-79c9-4091-a48c-f81424b3ccab) | ✅ | ✅ | ✅ | | | -| [generic-example](generic-example/) | [scrfd-person-detection](https://models.luxonis.com/luxonis/scrfd-person-detection/c3830468-3178-4de6-bc09-0543bbe28b1c) | ✅ | ✅ | ✅ | | | -| [generic-example](generic-example/) | [mediapipe-palm-detection](https://models.luxonis.com/luxonis/mediapipe-palm-detection/9531aba9-ef45-4ad3-ae03-808387d61bf3) | ✅ | ✅ | ✅ | [gen2-palm-detection](https://github.com/luxonis/oak-examples/tree/master/gen2-palm-detection) | | -| [generic-example](generic-example/) | [ppe-detection](https://models.luxonis.com/luxonis/ppe-detection/fd8699bf-3819-4134-9374-3735b9660d3c) | ✅ | ✅ | ✅ | | | -| [generic-example](generic-example/) | [paddle-text-detection](https://models.luxonis.com/luxonis/paddle-text-detection/131d855c-60b1-4634-a14d-1269bb35dcd2) | ✅ | ✅ | ✅ | | | -| [human-machine-safety](object-detection/human-machine-safety/) | [yolov6-nano](https://models.luxonis.com/luxonis/yolov6-nano/face58c4-45ab-42a0-bafc-19f9fee8a034), [mediapipe-palm-detection](https://models.luxonis.com/luxonis/mediapipe-palm-detection/9531aba9-ef45-4ad3-ae03-808387d61bf3) | ✅ | ✅ | ✅ | [gen2-human-machine-safety](https://github.com/luxonis/oak-examples/tree/master/gen2-human-machine-safety) | Needs a device with color and stereo (left, right) cameras. | -| [spatial-detections](object-detection/spatial-detections/) | [yolov6-nano](https://models.luxonis.com/luxonis/yolov6-nano/face58c4-45ab-42a0-bafc-19f9fee8a034) / [yolov6-large](https://models.luxonis.com/luxonis/yolov6-large/7937248a-c310-4765-97db-0850086f2dd9) / [yolov10-nano](https://models.luxonis.com/luxonis/yolov10-nano/03153a9a-06f7-4ce9-b655-3762d21d0a8a) | ✅ | ✅ | ✅ | | Needs a device with color and stereo (left, right) cameras. | -| [social-distancing](object-detection/social-distancing/) | [scrfd-person-detection](https://models.luxonis.com/luxonis/scrfd-person-detection/c3830468-3178-4de6-bc09-0543bbe28b1c) | ✅ | ✅ | ✅ | [gen2-social-distancing](https://github.com/luxonis/oak-examples/tree/master/gen2-social-distancing) | Needs a device with color and stereo (left, right) cameras. | -| [generic-example](generic-example/) | [fire-detection](https://models.luxonis.com/luxonis/fire-detection/899c54a8-9e62-4c99-843d-a51751949a7e) | ✅ | ✅ | ✅ | [gen2-fire-detection](https://github.com/luxonis/oak-examples/tree/master/gen2-fire-detection) | | -| [generic-example](generic-example/) | [mobile-object-localizer](https://models.luxonis.com/luxonis/mobile-object-localizer/d7187509-c6c5-4979-9f59-af885011ad2e) | ✅ | ✅ | ✅ | [gen2-mobile-object-localizer](https://github.com/luxonis/oak-examples/tree/master/gen2-mobile-object-localizer) | | -| [text-blur](object-detection/text-blur/) | [paddle-text-detection](https://models.luxonis.com/luxonis/paddle-text-detection/131d855c-60b1-4634-a14d-1269bb35dcd2) | ✅ | ✅ | ✅ | [gen2-text-blur](https://github.com/luxonis/oak-examples/tree/master/gen2-text-blur) | | -| [thermal-detection](object-detection/thermal-detection/) | [thermal-person-detection](https://models.luxonis.com/luxonis/thermal-person-detection/b1d7a62f-7020-469c-8fa9-a6d1ff3499b2) | ✅ | ❌ | ❌ | | Only available for OAK Thermal(RVC2). | -| [yolo-host-decoding](object-detection/yolo-host-decoding/) | [yolov6-nano](https://models.luxonis.com/luxonis/yolov6-nano/face58c4-45ab-42a0-bafc-19f9fee8a034) | ✅ | ✅ | ✅ | [gen2-yolo/host-decoding](https://github.com/luxonis/oak-examples/tree/master/gen2-yolo/host-decoding) | | -| [yolo-p](object-detection/yolo-p/) | [yolo-p](https://models.luxonis.com/luxonis/yolo-p/0a22d194-d525-46e7-a785-a267b7958a39) | ✅ | ✅ | ✅ | [gen2-yolo/yolop](https://github.com/luxonis/oak-examples/tree/master/gen2-yolo/yolop), [gen2-road-segmentation](https://github.com/luxonis/oak-examples/tree/master/gen2-road-segmentation) | | -| [yolo-world](object-detection/yolo-world/) | [yolo-world-l](https://models.luxonis.com/luxonis/yolo-world-l/6684e96f-11fc-4d92-8657-12a5fd8e532a) | ❌ | ✅ | ✅ | | Model is only available for RVC4. | +| Name | HubAI Model | RVC2 | RVC4 (peripheral) | RVC4 (standalone) | DepthAIv2 | Notes | +| ------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ----------------- | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------- | +| [generic-example](generic-example/) | [mobilenet-ssd](https://models.luxonis.com/luxonis/mobilenet-ssd/2da6e0a5-4785-488d-8cf5-c35f7ec1a1ed) | ✅ | ❌ | ❌ | [gen2-efficientDet](https://github.com/luxonis/oak-examples/tree/master/gen2-efficientDet) | Model not available for RVC4. | +| [generic-example](generic-example/) | [yolov6-nano](https://models.luxonis.com/luxonis/yolov6-nano/face58c4-45ab-42a0-bafc-19f9fee8a034) / [yolov6-large](https://models.luxonis.com/luxonis/yolov6-large/7937248a-c310-4765-97db-0850086f2dd9) / [yolov10-nano](https://models.luxonis.com/luxonis/yolov10-nano/03153a9a-06f7-4ce9-b655-3762d21d0a8a) | ✅ | ✅ | ✅ | [gen2-yolo](https://github.com/luxonis/oak-examples/tree/master/gen2-yolo) | | +| [generic-example](generic-example/) | [qrdet](https://models.luxonis.com/luxonis/qrdet/d1183a0f-e9a0-4fa2-8437-f2f5b0181739) | ✅ | ✅ | ✅ | [gen2-qr-code-scanner](https://github.com/luxonis/oak-examples/tree/master/gen2-qr-code-scanner) | | +| [generic-example](generic-example/) | [barcode-detection](https://models.luxonis.com/luxonis/barcode-detection/75edea0f-79c9-4091-a48c-f81424b3ccab) | ✅ | ✅ | ✅ | | | +| [generic-example](generic-example/) | [scrfd-person-detection](https://models.luxonis.com/luxonis/scrfd-person-detection/c3830468-3178-4de6-bc09-0543bbe28b1c) | ✅ | ✅ | ✅ | | | +| [generic-example](generic-example/) | [mediapipe-palm-detection](https://models.luxonis.com/luxonis/mediapipe-palm-detection/9531aba9-ef45-4ad3-ae03-808387d61bf3) | ✅ | ✅ | ✅ | [gen2-palm-detection](https://github.com/luxonis/oak-examples/tree/master/gen2-palm-detection) | | +| [generic-example](generic-example/) | [ppe-detection](https://models.luxonis.com/luxonis/ppe-detection/fd8699bf-3819-4134-9374-3735b9660d3c) | ✅ | ✅ | ✅ | | | +| [generic-example](generic-example/) | [paddle-text-detection](https://models.luxonis.com/luxonis/paddle-text-detection/131d855c-60b1-4634-a14d-1269bb35dcd2) | ✅ | ✅ | ✅ | | | +| [human-machine-safety](object-detection/human-machine-safety/) | [yolov6-nano](https://models.luxonis.com/luxonis/yolov6-nano/face58c4-45ab-42a0-bafc-19f9fee8a034), [mediapipe-palm-detection](https://models.luxonis.com/luxonis/mediapipe-palm-detection/9531aba9-ef45-4ad3-ae03-808387d61bf3) | ✅ | ✅ | ✅ | [gen2-human-machine-safety](https://github.com/luxonis/oak-examples/tree/master/gen2-human-machine-safety) | Needs a device with color and stereo (left, right) cameras. | +| [spatial-detections](object-detection/spatial-detections/) | [yolov6-nano](https://models.luxonis.com/luxonis/yolov6-nano/face58c4-45ab-42a0-bafc-19f9fee8a034) / [yolov6-large](https://models.luxonis.com/luxonis/yolov6-large/7937248a-c310-4765-97db-0850086f2dd9) / [yolov10-nano](https://models.luxonis.com/luxonis/yolov10-nano/03153a9a-06f7-4ce9-b655-3762d21d0a8a) | ✅ | ✅ | ✅ | | Needs a device with color and stereo (left, right) cameras. | +| [social-distancing](object-detection/social-distancing/) | [scrfd-person-detection](https://models.luxonis.com/luxonis/scrfd-person-detection/c3830468-3178-4de6-bc09-0543bbe28b1c) | ✅ | ✅ | ✅ | [gen2-social-distancing](https://github.com/luxonis/oak-examples/tree/master/gen2-social-distancing) | Needs a device with color and stereo (left, right) cameras. | +| [generic-example](generic-example/) | [fire-detection](https://models.luxonis.com/luxonis/fire-detection/899c54a8-9e62-4c99-843d-a51751949a7e) | ✅ | ✅ | ✅ | [gen2-fire-detection](https://github.com/luxonis/oak-examples/tree/master/gen2-fire-detection) | | +| [generic-example](generic-example/) | [mobile-object-localizer](https://models.luxonis.com/luxonis/mobile-object-localizer/d7187509-c6c5-4979-9f59-af885011ad2e) | ✅ | ✅ | ✅ | [gen2-mobile-object-localizer](https://github.com/luxonis/oak-examples/tree/master/gen2-mobile-object-localizer) | | +| [text-blur](object-detection/text-blur/) | [paddle-text-detection](https://models.luxonis.com/luxonis/paddle-text-detection/131d855c-60b1-4634-a14d-1269bb35dcd2) | ✅ | ✅ | ✅ | [gen2-text-blur](https://github.com/luxonis/oak-examples/tree/master/gen2-text-blur) | | +| [thermal-detection](object-detection/thermal-detection/) | [thermal-person-detection](https://models.luxonis.com/luxonis/thermal-person-detection/b1d7a62f-7020-469c-8fa9-a6d1ff3499b2) | ✅ | ❌ | ❌ | | Only available for OAK Thermal(RVC2). | +| [yolo-host-decoding](object-detection/yolo-host-decoding/) | [yolov6-nano](https://models.luxonis.com/luxonis/yolov6-nano/face58c4-45ab-42a0-bafc-19f9fee8a034) | ✅ | ✅ | ✅ | [gen2-yolo/host-decoding](https://github.com/luxonis/oak-examples/tree/master/gen2-yolo/host-decoding) | | +| [yolo-p](object-detection/yolo-p/) | [yolo-p](https://models.luxonis.com/luxonis/yolo-p/0a22d194-d525-46e7-a785-a267b7958a39) | ✅ | ✅ | ✅ | [gen2-yolo/yolop](https://github.com/luxonis/oak-examples/tree/master/gen2-yolo/yolop), [gen2-road-segmentation](https://github.com/luxonis/oak-examples/tree/master/gen2-road-segmentation) | | +| [yolo-world](object-detection/yolo-world/) | [yolo-world-l](https://models.luxonis.com/luxonis/yolo-world-l/6684e96f-11fc-4d92-8657-12a5fd8e532a) | ❌ | ✅ | ✅ | | Model is only available for RVC4. | +| [barcode-detection-conveyor-belt](object-detection/barcode-detection-conveyor-belt/) | [barcode-detection](https://models.luxonis.com/luxonis/barcode-detection/75edea0f-79c9-4091-a48c-f81424b3ccab) | ✅ | ✅ | ✅ | | Most suited for OAK4-CS | ## Face Detection diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/README.md b/neural-networks/object-detection/barcode-detection-conveyor-belt/README.md new file mode 100644 index 000000000..04bd1cd1b --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/README.md @@ -0,0 +1,111 @@ +# Barcode Detection on Conveyor Belt + +This example demonstrates how to detect and decode barcodes in real-time using computer vision. The application is designed for conveyor belt applications where barcodes need to be detected and decoded from video streams. It uses a [barcode detection model](https://models.luxonis.com/luxonis/barcode-detection/75edea0f-79c9-4091-a48c-f81424b3ccab) for detecting barcode regions and combines multiple decoding strategies (pyzbar and zxing-cpp) to ensure robust barcode recognition across various formats and conditions. + +The system processes high-resolution camera input, intelligently crops detected barcode regions, and applies multiple fallback decoding strategies including rotation and color inversion to maximize recognition success rates. + +## ⚠️ Important Notice + +**This application will work poorly on fixed focus devices.** The barcode detection and decoding algorithms require clear, focused images to function effectively. Fixed focus cameras may struggle to capture sharp barcode images at varying distances, leading to: + +- Reduced detection accuracy +- Failed barcode decoding attempts +- Inconsistent performance + +For optimal results, use devices with autofocus capabilities or ensure barcodes are positioned at the camera's fixed focal distance. + +## Recommended Devices + +We recommend using **OAK4-CS** for this example. Its **global-shutter** color sensor is best for fast-moving conveyor belts, reducing motion blur and rolling-shutter artifacts that can cause missed or incorrect decodes. + +The application will also run on **OAK4-S** and **OAK4-D** devices. For rolling-shutter or fixed-focus variants, keep barcodes near the best-focus distance and ensure good lighting to maximize decoding reliability. See the notice above regarding fixed-focus cameras. + +## Demo + +![Demo](media/barcode_demo.gif) + +> **Note:** The stream may appear purplish because the OAK4-CS lacks an IR-cut filter. + +## Usage + +Running this example requires a **Luxonis device** connected to your computer. Refer to the [documentation](https://docs.luxonis.com/software-v3/) to setup your device if you haven't done it already. + +You can run the example fully on device ([`STANDALONE` mode](#standalone-mode-rvc4-only)) or using your computer as host ([`PERIPHERAL` mode](#peripheral-mode)). + +Here is a list of all available parameters: + +``` +-d DEVICE, --device DEVICE + Optional name, DeviceID or IP of the camera to connect to. (default: None) +-fps FPS_LIMIT, --fps_limit FPS_LIMIT + FPS limit for the model runtime. (default: 10 for RVC2, 30 for RVC4) +--media_path MEDIA_PATH + Optional path to video file for processing instead of live camera feed. (default: None) +``` + +## Peripheral Mode + +### Installation + +Install libraries: + +**Ubuntu:** + +```bash +sudo apt-get update && apt-get install -y libzbar0 libzbar-dev +``` + +**macOS:** + +```bash +brew install zbar +``` + +You need to first prepare a **Python 3.10** environment with the following packages installed: + +- [DepthAI](https://pypi.org/project/depthai/), +- [DepthAI Nodes](https://pypi.org/project/depthai-nodes/). + +You can simply install them by running: + +```bash +pip install -r requirements.txt +``` + +Running in peripheral mode requires a host computer and there will be communication between device and host which could affect the overall speed of the app. Below are some examples of how to run the example. + +### Examples + +Start the demo: + +```bash +python3 main.py +``` + +This will run the example with default arguments. + +```bash +python3 main.py --device 192.168.1.100 --fps_limit 15 +``` + +This will connect to a specific device and set the FPS limit to 15. + +```bash +python3 main.py --media_path test_video.mp4 +``` + +This will process a video file instead of live camera feed. + +## Standalone Mode (RVC4 only) + +Running the example in the standalone mode, app runs entirely on the device. +To run the example in this mode, first install the `oakctl` tool using the installation instructions [here](https://docs.luxonis.com/software-v3/oak-apps/oakctl). + +The app can then be run with: + +```bash +oakctl connect +oakctl app run . +``` + +This will run the example with default argument values. If you want to change these values you need to edit the `oakapp.toml` file (refer [here](https://docs.luxonis.com/software-v3/oak-apps/configuration/) for more information about this configuration file). diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/depthai_models/barcode-detection.RVC2.yaml b/neural-networks/object-detection/barcode-detection-conveyor-belt/depthai_models/barcode-detection.RVC2.yaml new file mode 100644 index 000000000..416772f42 --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/depthai_models/barcode-detection.RVC2.yaml @@ -0,0 +1,2 @@ +model: luxonis/barcode-detection:768x576 +platform: RVC2 \ No newline at end of file diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/depthai_models/barcode-detection.RVC4.yaml b/neural-networks/object-detection/barcode-detection-conveyor-belt/depthai_models/barcode-detection.RVC4.yaml new file mode 100644 index 000000000..46a1e1dd0 --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/depthai_models/barcode-detection.RVC4.yaml @@ -0,0 +1,2 @@ +model: luxonis/barcode-detection:768x576 +platform: RVC4 \ No newline at end of file diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/main.py b/neural-networks/object-detection/barcode-detection-conveyor-belt/main.py new file mode 100644 index 000000000..91f80128e --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/main.py @@ -0,0 +1,95 @@ +from pathlib import Path +import depthai as dai +from depthai_nodes.node import ParsingNeuralNetwork + +from utils.arguments import initialize_argparser +from utils.simple_barcode_overlay import SimpleBarcodeOverlay +from utils.barcode_decoder import BarcodeDecoder +from utils.host_crop_config_creator import CropConfigsCreator + +_, args = initialize_argparser() + +visualizer = dai.RemoteConnection(httpPort=8082) +device = dai.Device(dai.DeviceInfo(args.device)) if args.device else dai.Device() +platform = device.getPlatform().name +print(f"Platform: {platform}") + +frame_type = ( + dai.ImgFrame.Type.BGR888i if platform == "RVC4" else dai.ImgFrame.Type.BGR888p +) + +if not args.fps_limit: + args.fps_limit = 10 if platform == "RVC2" else 30 + print( + f"\nFPS limit set to {args.fps_limit} for {platform} platform. If you want to set a custom FPS limit, use the --fps_limit flag.\n" + ) + +with dai.Pipeline(device) as pipeline: + print("Creating pipeline...") + + model_description = dai.NNModelDescription.fromYamlFile( + f"barcode-detection.{platform}.yaml" + ) + nn_archive = dai.NNArchive( + dai.getModelFromZoo( + model_description, + ) + ) + + if args.media_path: + replay = pipeline.create(dai.node.ReplayVideo) + replay.setReplayVideoFile(Path(args.media_path)) + replay.setOutFrameType(frame_type) + replay.setLoop(True) + if args.fps_limit: + replay.setFps(args.fps_limit) + else: + cam = pipeline.create(dai.node.Camera).build() + + cam_out = cam.requestOutput((2592, 1944), frame_type, fps=args.fps_limit) + input_node = replay.out if args.media_path else cam_out + + resize_node = pipeline.create(dai.node.ImageManip) + resize_node.setMaxOutputFrameSize( + nn_archive.getInputWidth() * nn_archive.getInputHeight() * 3 + ) + resize_node.initialConfig.setOutputSize( + nn_archive.getInputWidth(), + nn_archive.getInputHeight(), + mode=dai.ImageManipConfig.ResizeMode.STRETCH, + ) + resize_node.initialConfig.setFrameType(frame_type) + input_node.link(resize_node.inputImage) + + detection_nn: ParsingNeuralNetwork = pipeline.create(ParsingNeuralNetwork).build( + resize_node.out, nn_archive + ) + + crop_code = pipeline.create(CropConfigsCreator).build( + detection_nn.out, + source_size=(2592, 1944), + target_size=(640, 480), + resize_mode=dai.ImageManipConfig.ResizeMode.LETTERBOX, + ) + + crop_manip = pipeline.create(dai.node.ImageManip) + crop_manip.inputConfig.setReusePreviousMessage(False) + crop_manip.setMaxOutputFrameSize(640 * 480 * 5) + input_node.link(crop_manip.inputImage) + crop_code.config_output.link(crop_manip.inputConfig) + + decoder = pipeline.create(BarcodeDecoder) + crop_manip.out.link(decoder.input) + + barcode_overlay = pipeline.create(SimpleBarcodeOverlay).build( + decoder.output, resize_node.out, detection_nn.out + ) + + visualizer.addTopic("Barcode Overlay", barcode_overlay.output) + + pipeline.run() + + while True: + key = visualizer.waitKey(1) + if key == ord("q"): + break diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/media/barcode_demo.gif b/neural-networks/object-detection/barcode-detection-conveyor-belt/media/barcode_demo.gif new file mode 100644 index 000000000..14a5a8f3b Binary files /dev/null and b/neural-networks/object-detection/barcode-detection-conveyor-belt/media/barcode_demo.gif differ diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/oakapp.toml b/neural-networks/object-detection/barcode-detection-conveyor-belt/oakapp.toml new file mode 100644 index 000000000..08a1c952c --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/oakapp.toml @@ -0,0 +1,17 @@ +identifier = "com.example.object-detection.conveyor-application-barcodes" +app_version = "1.0.0" + +prepare_container = [ + { type = "RUN", command = "apt-get update" }, + { type = "RUN", command = "apt-get install -y python3 python3-pip libzbar0 libzbar-dev" }, + { type = "COPY", source = "requirements.txt", target = "requirements.txt" }, + { type = "RUN", command = "pip3 install -r /app/requirements.txt --break-system-packages"}, +] + +prepare_build_container = [] + +build_steps = [] + +depthai_models = { yaml_path = "./depthai_models" } + +entrypoint = ["bash", "-c", "python3 -u /app/main.py"] \ No newline at end of file diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/requirements.txt b/neural-networks/object-detection/barcode-detection-conveyor-belt/requirements.txt new file mode 100644 index 000000000..445462a22 --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/requirements.txt @@ -0,0 +1,6 @@ +depthai>=3.0.0 +depthai-nodes==0.3.4 +numpy>=1.22 +opencv-python-headless~=4.10.0 +pyzbar==0.1.9 +Pillow==12.0.0 \ No newline at end of file diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/annotation_node.py b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/annotation_node.py new file mode 100644 index 000000000..b1a38851f --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/annotation_node.py @@ -0,0 +1,77 @@ +from typing import List +import depthai as dai + +from depthai_nodes import ImgDetectionsExtended, SECONDARY_COLOR +from depthai_nodes.utils import AnnotationHelper + + +class AnnotationNode(dai.node.ThreadedHostNode): + def __init__(self) -> None: + super().__init__() + + self.input = self.createInput() + self.input.setPossibleDatatypes([(dai.DatatypeEnum.Buffer, True)]) + + self.out = self.createOutput() + self.out.setPossibleDatatypes([(dai.DatatypeEnum.Buffer, True)]) + + def build( + self, + gather_data_msg: dai.Node.Output, + ) -> "AnnotationNode": + gather_data_msg.link(self.input) + return self + + def run(self) -> None: + while self.isRunning(): + gather_data_msg: dai.Buffer = self.input.get() + + img_detections_extended_msg: ImgDetectionsExtended = ( + gather_data_msg.reference_data + ) + + msg_group_list: List[dai.MessageGroup] = gather_data_msg.gathered + + annotations = AnnotationHelper() + + for img_detection_extended_msg, msg_group in zip( + img_detections_extended_msg.detections, msg_group_list + ): + xmin, ymin, xmax, ymax = ( + img_detection_extended_msg.rotated_rect.getOuterRect() + ) + + try: + xmin = float(xmin) + ymin = float(ymin) + xmax = float(xmax) + ymax = float(ymax) + except Exception: + pass + + xmin = max(0.0, min(1.0, xmin)) + ymin = max(0.0, min(1.0, ymin)) + xmax = max(0.0, min(1.0, xmax)) + ymax = max(0.0, min(1.0, ymax)) + + annotations.draw_rectangle((xmin, ymin), (xmax, ymax)) + + barcode_text = "" + if "0" in msg_group: + buf_msg: dai.Buffer = msg_group["0"] + barcode_text = buf_msg.getData().decode("utf-8", errors="ignore") + + if barcode_text: + annotations.draw_text( + text=barcode_text, + position=(xmin + 0.01, ymin + 0.03), + size=20, + color=SECONDARY_COLOR, + ) + + annotations_msg = annotations.build( + timestamp=img_detections_extended_msg.getTimestamp(), + sequence_num=img_detections_extended_msg.getSequenceNum(), + ) + + self.out.send(annotations_msg) diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/arguments.py b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/arguments.py new file mode 100644 index 000000000..4ef0b9862 --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/arguments.py @@ -0,0 +1,38 @@ +import argparse + + +def initialize_argparser(): + """Initialize the argument parser for the script.""" + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "-d", + "--device", + help="Optional name, DeviceID or IP of the camera to connect to.", + required=False, + default=None, + type=str, + ) + + parser.add_argument( + "-fps", + "--fps_limit", + help="FPS limit for the model runtime.", + required=False, + default=None, + type=int, + ) + + parser.add_argument( + "-media", + "--media_path", + help="Path to the media file you aim to run the model on. If not set, the model will run on the camera input.", + required=False, + default=None, + type=str, + ) + args = parser.parse_args() + + return parser, args diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/barcode_decoder.py b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/barcode_decoder.py new file mode 100644 index 000000000..00a2b2946 --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/barcode_decoder.py @@ -0,0 +1,54 @@ +import time +import depthai as dai +from PIL import Image +from pyzbar.pyzbar import decode +import cv2 + + +class BarcodeDecoder(dai.node.ThreadedHostNode): + """ + Custom host node that receives ImgFrame messages, + runs pyzbar (plus optional fallbacks), and emits raw bytes + in dai.Buffer messages. + """ + + def __init__(self): + super().__init__() + + self.input = self.createInput() + self.input.setPossibleDatatypes([(dai.DatatypeEnum.ImgFrame, True)]) + + self.output = self.createOutput() + self.output.setPossibleDatatypes([(dai.DatatypeEnum.Buffer, True)]) + + def run(self): + while self.isRunning(): + in_msg = self.input.tryGet() + if in_msg is None: + time.sleep(0.001) + continue + + cv_frame = in_msg.getCvFrame() + pil_img = Image.fromarray(cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB)) + + barcodes = decode(pil_img) + + if not barcodes: + for angle in (90, 180, 270): + rotated = pil_img.rotate(angle, expand=True) + barcodes = decode(rotated) + if barcodes: + break + + if not barcodes: + inv_frame = cv2.bitwise_not(cv_frame) + inv_pil = Image.fromarray(cv2.cvtColor(inv_frame, cv2.COLOR_BGR2RGB)) + barcodes = decode(inv_pil) + + for bc in barcodes: + buf = dai.Buffer() + buf.setData(bc.data) + self.output.send(buf) + + if not barcodes: + time.sleep(0.001) diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/host_crop_config_creator.py b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/host_crop_config_creator.py new file mode 100644 index 000000000..bcee42f3b --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/host_crop_config_creator.py @@ -0,0 +1,275 @@ +from typing import Optional, Tuple +import time + +import depthai as dai + +from depthai_nodes import ImgDetectionExtended, ImgDetectionsExtended + + +class CropConfigsCreator(dai.node.HostNode): + """A node to create and send a dai.ImageManipConfigV2 crop configuration for each + detection in a list of detections. An optional target size and resize mode can be + set to ensure uniform crop sizes. + + To ensure correct synchronization between the crop configurations and the image, + ensure "inputConfig.setReusePreviousMessage" is set to False in the dai.ImageManipV2 node. + + Attributes + ---------- + detections_input : dai.Input + The input link for the ImageDetectionsExtended | dai.ImgDetections message. + config_output : dai.Output + The output link for the ImageManipConfigV2 messages. + detections_output : dai.Output + The output link for the ImgDetectionsExtended message. + source_size : Tuple[int, int] + The size of the source image (width, height). + target_size : Optional[Tuple[int, int]] = None + The size of the target image (width, height). If None, crop sizes will not be uniform. + resize_mode : dai.ImageManipConfigV2.ResizeMode = dai.ImageManipConfigV2.ResizeMode.STRETCH + The resize mode to use when target size is set. Options are: CENTER_CROP, LETTERBOX, NONE, STRETCH. + """ + + def __init__(self) -> None: + """Initializes the node.""" + super().__init__() + self.config_output = self.createOutput( + possibleDatatypes=[ + dai.Node.DatatypeHierarchy(dai.DatatypeEnum.ImageManipConfig, True) + ] + ) + + self.detections_output = self.createOutput( + possibleDatatypes=[ + dai.Node.DatatypeHierarchy(dai.DatatypeEnum.Buffer, True) + ] + ) + self._w: int = None + self._h: int = None + self._target_w: int = None + self._target_h: int = None + self.resize_mode: dai.ImageManipConfig.ResizeMode = None + + @property + def w(self) -> int: + """Returns the width of the source image. + + @return: Width of the source image. + @rtype: int + """ + return self._w + + @property + def h(self) -> int: + """Returns the height of the source image. + + @return: Height of the source image. + @rtype: int + """ + return self._h + + @property + def target_w(self) -> int: + """Returns the width of the target image. + + @return: Width of the target image. + @rtype: int + """ + return self._target_w + + @property + def target_h(self) -> int: + """Returns the height of the target image. + + @return: Height of the target image. + @rtype: int + """ + return self._target_h + + @w.setter + def w(self, w: int): + """Sets the width of the source image. + + @param w: Width of the source image. + @type w: int + @raise TypeError: If w is not an integer. + @raise ValueError: If w is less than 1. + """ + self._validate_positive_integer(w) + self._w = w + + @h.setter + def h(self, h: int): + """Sets the height of the source image. + + @param h: Height of the source image. + @type h: int + @raise TypeError: If h is not an integer. + @raise ValueError: If h is less than 1. + """ + self._validate_positive_integer(h) + self._h = h + + @target_w.setter + def target_w(self, target_w: int): + """Sets the width of the target image. + + @param target_w: Width of the target image. + @type target_w: int + @raise TypeError: If target_w is not an integer. + @raise ValueError: If target_w is less than 1. + """ + self._validate_positive_integer(target_w) + self._target_w = target_w + + @target_h.setter + def target_h(self, target_h: int): + """Sets the height of the target image. + + @param target_h: Height of the target image. + @type target_h: int + @raise TypeError: If target_h is not an integer. + @raise ValueError: If target_h is less than 1. + """ + self._validate_positive_integer(target_h) + self._target_h = target_h + + def build( + self, + detections_input: dai.Node.Output, + source_size: Tuple[int, int], + target_size: Optional[Tuple[int, int]] = None, + resize_mode: dai.ImageManipConfig.ResizeMode = dai.ImageManipConfig.ResizeMode.STRETCH, + ) -> "CropConfigsCreator": + """Link the node input and set the correct source and target image sizes. + + Parameters + ---------- + detections_input : dai.Node.Output + The input link for the ImgDetectionsExtended message + source_size : Tuple[int, int] + The size of the source image (width, height). + target_size : Optional[Tuple[int, int]] + The size of the target image (width, height). If None, crop sizes will not be uniform. + resize_mode : dai.ImageManipConfigV2.ResizeMode = dai.ImageManipConfigV2.ResizeMode.STRETCH + The resize mode to use when target size is set. Options are: CENTER_CROP, LETTERBOX, NONE, STRETCH. + """ + + self.w = source_size[0] + self.h = source_size[1] + + if target_size is not None: + self.target_w = target_size[0] + self.target_h = target_size[1] + + self.resize_mode = resize_mode + + self.link_args(detections_input) + + return self + + def process(self, detections_input: dai.Buffer) -> None: + """Process the input detections and create crop configurations. This function is + ran every time a new ImgDetectionsExtended or dai.ImgDetections message is + received. + + Sends len(detections) number of crop configurations to the config_output link. + In addition sends an ImgDetectionsExtended object containing the corresponding + detections to the detections_output link. + """ + + assert isinstance(detections_input, (ImgDetectionsExtended, dai.ImgDetections)) + sequence_num = detections_input.getSequenceNum() + timestamp = detections_input.getTimestamp() + + if isinstance(detections_input, dai.ImgDetections): + detections_msg = self._convert_to_extended(detections_input) + else: + detections_msg = detections_input + + detections = detections_msg.detections + + # Skip the current frame / load new frame + cfg = dai.ImageManipConfig() + cfg.setSkipCurrentImage(True) + cfg.setTimestamp(timestamp) + cfg.setSequenceNum(sequence_num) + send_status = False + attempts = 0 + while ( + not send_status and attempts < 100 + ): # Limit attempts to prevent infinite loop + send_status = self.config_output.trySend(cfg) + if not send_status: + attempts += 1 + time.sleep(0.001) # Small delay to prevent busy waiting + + for i in range(len(detections)): + cfg = dai.ImageManipConfig() + detection: ImgDetectionExtended = detections[i] + rect = detection.rotated_rect + rect = rect.denormalize(self.w, self.h) + + cfg.addCropRotatedRect(rect, normalizedCoords=False) + + if self.target_w is not None and self.target_h is not None: + cfg.setOutputSize(self.target_w, self.target_h, self.resize_mode) + + cfg.setReusePreviousImage(True) + cfg.setTimestamp(timestamp) + cfg.setSequenceNum(sequence_num) + + send_status = False + attempts = 0 + while ( + not send_status and attempts < 100 + ): # Limit attempts to prevent infinite loop + send_status = self.config_output.trySend(cfg) + if not send_status: + attempts += 1 + time.sleep(0.001) # Small delay to prevent busy waiting + + self.detections_output.send(detections_msg) + + def _convert_to_extended( + self, detections: dai.ImgDetections + ) -> ImgDetectionsExtended: + rotated_rectangle_detections = [] + for det in detections.detections: + img_detection = ImgDetectionExtended() + img_detection.label = det.label + img_detection.confidence = det.confidence + + x_center = (det.xmin + det.xmax) / 2 + y_center = (det.ymin + det.ymax) / 2 + width = det.xmax - det.xmin + height = det.ymax - det.ymin + width = width * 1.15 + height = height * 1.15 + + img_detection.rotated_rect = (x_center, y_center, width, height, 0.0) + + rotated_rectangle_detections.append(img_detection) + + img_detections_extended = ImgDetectionsExtended() + img_detections_extended.setSequenceNum(detections.getSequenceNum()) + img_detections_extended.setTimestamp(detections.getTimestamp()) + img_detections_extended.detections = rotated_rectangle_detections + transformation = detections.getTransformation() + if transformation is not None: + img_detections_extended.setTransformation(transformation) + + return img_detections_extended + + def _validate_positive_integer(self, value: int): + """Validates that the set size is a positive integer. + + @param value: The value to validate. + @type value: int + @raise TypeError: If value is not an integer. + @raise ValueError: If value is less than 1. + """ + if not isinstance(value, int): + raise TypeError("Value must be an integer.") + if value < 1: + raise ValueError("Value must be greater than 1.") diff --git a/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/simple_barcode_overlay.py b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/simple_barcode_overlay.py new file mode 100644 index 000000000..3a99ee463 --- /dev/null +++ b/neural-networks/object-detection/barcode-detection-conveyor-belt/utils/simple_barcode_overlay.py @@ -0,0 +1,271 @@ +import cv2 +import depthai as dai +from threading import Lock +import time + + +class SimpleBarcodeOverlay(dai.node.HostNode): + """ + Simple overlay that shows the most recently decoded barcode on the video stream and detection boxes. + """ + + def __init__(self): + super().__init__() + + self.barcodes = {} + self.latest_detections = None + self.lock = Lock() + self.max_display_time = 3.0 + self.max_barcodes = 5 + self.input_barcode = self.createInput() + self.output = self.createOutput() + self.output.setPossibleDatatypes([(dai.DatatypeEnum.ImgFrame, True)]) + + def build( + self, + barcode_source: dai.Node.Output, + video_source: dai.Node.Output, + detection_source: dai.Node.Output = None, + ) -> "SimpleBarcodeOverlay": + self.link_args(video_source, detection_source) + barcode_source.link(self.input_barcode) + + self.sendProcessingToPipeline(True) + return self + + def process( + self, video_msg: dai.ImgFrame, detection_msg: dai.Buffer = None + ) -> dai.ImgFrame: + try: + with self.lock: + current_time = time.time() + while self.input_barcode.has(): + barcode_msg = self.input_barcode.get() + data = barcode_msg.getData() + + if isinstance(data, bytes): + barcode_text = data.decode("utf-8", errors="ignore") + elif hasattr(data, "tobytes"): + barcode_text = data.tobytes().decode("utf-8", errors="ignore") + else: + barcode_text = str(data) + + self.barcodes[barcode_text] = current_time + + self.barcodes = { + code: timestamp + for code, timestamp in self.barcodes.items() + if current_time - timestamp < self.max_display_time + } + + if len(self.barcodes) > self.max_barcodes: + sorted_barcodes = sorted( + self.barcodes.items(), key=lambda x: x[1], reverse=True + ) + self.barcodes = dict(sorted_barcodes[: self.max_barcodes]) + except Exception as e: + pass + + try: + with self.lock: + self.latest_detections = detection_msg + except Exception as e: + pass + + try: + frame = video_msg.getCvFrame() + + with self.lock: + current_time = time.time() + + if self.latest_detections: + self._draw_detection_boxes(frame, self.latest_detections) + + active_barcodes = { + code: timestamp + for code, timestamp in self.barcodes.items() + if current_time - timestamp < self.max_display_time + } + self.barcodes = active_barcodes + + if active_barcodes: + self._draw_multiple_barcodes(frame, active_barcodes, current_time) + + output_msg = dai.ImgFrame() + output_msg.setData(frame) + output_msg.setTimestamp(video_msg.getTimestamp()) + output_msg.setSequenceNum(video_msg.getSequenceNum()) + output_msg.setWidth(frame.shape[1]) + output_msg.setHeight(frame.shape[0]) + output_msg.setType(video_msg.getType()) + + # Send the frame to the node's output so it appears in the DepthAI Visualizer + self.output.send(output_msg) + + except Exception as e: + pass + + def _draw_multiple_barcodes(self, frame, active_barcodes, current_time): + """Draw multiple barcode texts with prettier styling""" + try: + h, w = frame.shape[:2] + + sorted_barcodes = sorted( + active_barcodes.items(), key=lambda x: x[1], reverse=True + ) + + panel_width = int(w * 0.35) + panel_x = w - panel_width - 20 + panel_y_start = 20 + + panel_height = min(len(sorted_barcodes) * 80 + 40, h - 40) + cv2.rectangle( + frame, + (panel_x - 15, panel_y_start), + (panel_x + panel_width + 15, panel_y_start + panel_height), + (0, 0, 0), + -1, + ) + + cv2.rectangle( + frame, + (panel_x - 15, panel_y_start), + (panel_x + panel_width + 15, panel_y_start + panel_height), + (100, 200, 255), + 3, + ) + + header_text = f"Detected Barcodes ({len(sorted_barcodes)})" + cv2.putText( + frame, + header_text, + (panel_x, panel_y_start + 25), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (100, 200, 255), + 2, + ) + + cv2.line( + frame, + (panel_x, panel_y_start + 35), + (panel_x + panel_width, panel_y_start + 35), + (100, 200, 255), + 2, + ) + + for i, (barcode_text, timestamp) in enumerate(sorted_barcodes): + if i >= self.max_barcodes: + break + + y_pos = panel_y_start + 60 + (i * 70) + if y_pos > h - 50: + break + + age = current_time - timestamp + alpha = max(0.3, 1.0 - (age / self.max_display_time)) + + if age < 1.0: + color = (0, 255, 0) + bg_color = (0, 50, 0) + elif age < 2.0: + color = (0, 255, 255) + bg_color = (0, 50, 50) + else: + color = (0, 150, 255) + bg_color = (0, 25, 50) + + color = tuple(int(c * alpha) for c in color) + + cv2.rectangle( + frame, + (panel_x, y_pos - 25), + (panel_x + panel_width, y_pos + 15), + bg_color, + -1, + ) + + display_text = barcode_text + if len(display_text) > 18: + display_text = display_text[:15] + "..." + + cv2.putText( + frame, + display_text, + (panel_x + 10, y_pos), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + color, + 2, + ) + + age_text = f"{age:.1f}s" + cv2.putText( + frame, + age_text, + (panel_x + panel_width - 50, y_pos - 8), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + (150, 150, 150), + 1, + ) + + dot_color = ( + (0, 255, 0) + if age < 1.0 + else (0, 255, 255) + if age < 2.0 + else (0, 150, 255) + ) + cv2.circle(frame, (panel_x + 5, y_pos - 5), 4, dot_color, -1) + + except Exception as e: + pass + + def _draw_detection_boxes(self, frame, detections): + """Draw detection bounding boxes on the frame""" + try: + h, w = frame.shape[:2] + + for detection in detections.detections: + xmin, ymin, xmax, ymax = detection.rotated_rect.getOuterRect() + + x1 = int(xmin * w) + y1 = int(ymin * h) + x2 = int(xmax * w) + y2 = int(ymax * h) + + x1 = max(0, min(w - 1, x1)) + y1 = max(0, min(h - 1, y1)) + x2 = max(0, min(w - 1, x2)) + y2 = max(0, min(h - 1, y2)) + + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + + confidence = detection.confidence + label = f"Barcode: {confidence:.2f}" + + (text_width, text_height), baseline = cv2.getTextSize( + label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2 + ) + + cv2.rectangle( + frame, + (x1, y1 - text_height - 10), + (x1 + text_width, y1), + (0, 255, 0), + -1, + ) + + cv2.putText( + frame, + label, + (x1, y1 - 5), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (0, 0, 0), + 2, + ) + + except Exception as e: + pass