diff --git a/481_WHC/LICENSE b/481_WHC/LICENSE new file mode 100644 index 0000000000..6058ee5e16 --- /dev/null +++ b/481_WHC/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Katsuya Hyodo + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/481_WHC/README.md b/481_WHC/README.md new file mode 100644 index 0000000000..c27d8519e0 --- /dev/null +++ b/481_WHC/README.md @@ -0,0 +1,81 @@ +# 481_WHC +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17690769.svg)](https://doi.org/10.5281/zenodo.17690769) ![GitHub License](https://img.shields.io/github/license/pinto0309/WHC) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/PINTO0309/whc) + +Waving Hand Classification. Ultrafast 1x3x4x32x32 3DConv gesture estimation. + +https://github.com/user-attachments/assets/c6b38d56-48b7-4609-bae1-f607c21ba423 + +https://github.com/user-attachments/assets/7e9f8763-839f-46d2-98b1-320170f8ed10 + +|Variant|Size|Seq|F1|CPU
inference
latency|ONNX
static seq|ONNX
dynamic seq| +|:-:|:-:|:-:|:-:|:-:|:-:|:-:| +|S|1.1 MB|4|0.9821|0.31 ms|[Download](https://github.com/PINTO0309/WHC/releases/download/onnx/whc_seq_3dcnn_4x32x32.onnx)|[Download](https://github.com/PINTO0309/WHC/releases/download/onnx/whc_seq_3dcnn_T4x32x32.onnx)| +|M|1.1 MB|6|0.9916|0.46 ms|[Download](https://github.com/PINTO0309/WHC/releases/download/onnx/whc_seq_3dcnn_6x32x32.onnx)|[Download](https://github.com/PINTO0309/WHC/releases/download/onnx/whc_seq_3dcnn_T6x32x32.onnx)| +|L|1.1 MB|8|0.9940|0.37 ms|[Download](https://github.com/PINTO0309/WHC/releases/download/onnx/whc_seq_3dcnn_8x32x32.onnx)|[Download](https://github.com/PINTO0309/WHC/releases/download/onnx/whc_seq_3dcnn_T8x32x32.onnx)| + +## Data sample + +|1|2|3|4| +|:-:|:-:|:-:|:-:| +|image|image|image|image| + +## Inference + +```bash +uv run python demo_whc.py \ +-wm whc_seq_3dcnn_4x32x32.onnx \ +-v 0 \ +-ep cuda \ +-dlr -dnm -dgm -dhm -dhd + +uv run python demo_whc.py \ +-wm whc_seq_3dcnn_4x32x32.onnx \ +-v 0 \ +-ep tensorrt \ +-dlr -dnm -dgm -dhm -dhd +``` + +## Arch + +whc_seq_3dcnn_4x32x32 + +## Ultra-lightweight classification model series +1. [VSDLM: Visual-only speech detection driven by lip movements](https://github.com/PINTO0309/VSDLM) - MIT License +2. [OCEC: Open closed eyes classification. Ultra-fast wink and blink estimation model](https://github.com/PINTO0309/OCEC) - MIT License +3. [PGC: Ultrafast pointing gesture classification](https://github.com/PINTO0309/PGC) - MIT License +4. [SC: Ultrafast sitting classification](https://github.com/PINTO0309/SC) - MIT License +5. [PUC: Phone Usage Classifier is a three-class image classification pipeline for understanding how people +interact with smartphones](https://github.com/PINTO0309/PUC) - MIT License +6. [HSC: Happy smile classifier](https://github.com/PINTO0309/HSC) - MIT License +7. [WHC: Waving Hand Classification](https://github.com/PINTO0309/WHC) - MIT License + +## Citation + +If you find this project useful, please consider citing: + +```bibtex +@software{hyodo2025whc, + author = {Katsuya Hyodo}, + title = {PINTO0309/WHC}, + month = {11}, + year = {2025}, + publisher = {Zenodo}, + doi = {10.5281/zenodo.17690769}, + url = {https://github.com/PINTO0309/whc}, + abstract = {Waving Hand Classification.}, +} +``` + +## Acknowledgments + +- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/472_DEIMv2-Wholebody34: Apache 2.0 License + ```bibtex + @software{DEIMv2-Wholebody34, + author={Katsuya Hyodo}, + title={Lightweight human detection models generated on high-quality human data sets. It can detect objects with high accuracy and speed in a total of 28 classes: body, adult, child, male, female, body_with_wheelchair, body_with_crutches, head, front, right-front, right-side, right-back, back, left-back, left-side, left-front, face, eye, nose, mouth, ear, collarbone, shoulder, solar_plexus, elbow, wrist, hand, hand_left, hand_right, abdomen, hip_joint, knee, ankle, foot.}, + url={https://github.com/PINTO0309/PINTO_model_zoo/tree/main/472_DEIMv2-Wholebody34}, + year={2025}, + month={10}, + doi={10.5281/zenodo.17625710} + } + ``` diff --git a/481_WHC/demo/demo_hsc.py b/481_WHC/demo/demo_hsc.py new file mode 100644 index 0000000000..d44fe59a2e --- /dev/null +++ b/481_WHC/demo/demo_hsc.py @@ -0,0 +1,2030 @@ +#!/usr/bin/env python + +from __future__ import annotations +import warnings +warnings.filterwarnings('ignore') +import os +import sys +import copy +import cv2 +import math +import time +from pprint import pprint +import numpy as np +from enum import Enum +from pathlib import Path +from dataclasses import dataclass +from argparse import ArgumentParser, ArgumentTypeError +from typing import Tuple, Optional, List, Dict, Any, Deque +import importlib.util +from collections import Counter, deque +from abc import ABC, abstractmethod + +from bbalg.main import state_verdict + +AVERAGE_HEAD_WIDTH: float = 0.16 + 0.10 # 16cm + Margin Compensation + +BOX_COLORS = [ + [(216, 67, 21),"Front"], + [(255, 87, 34),"Right-Front"], + [(123, 31, 162),"Right-Side"], + [(255, 193, 7),"Right-Back"], + [(76, 175, 80),"Back"], + [(33, 150, 243),"Left-Back"], + [(156, 39, 176),"Left-Side"], + [(0, 188, 212),"Left-Front"], +] + +# The pairs of classes you want to join +# (there is some overlap because there are left and right classes) +EDGES = [ + (21, 22), (21, 22), # collarbone -> shoulder (left and right) + (21, 23), # collarbone -> solar_plexus + (22, 24), (22, 24), # shoulder -> elbow (left and right) + (22, 30), (22, 30), # shoulder -> hip_joint (left and right) + (24, 25), (24, 25), # elbow -> wrist (left and right) + (23, 29), # solar_plexus -> abdomen + (29, 30), (29, 30), # abdomen -> hip_joint (left and right) + (30, 31), (30, 31), # hip_joint -> knee (left and right) + (31, 32), (31, 32), # knee -> ankle (left and right) +] + +BODY_LONG_HISTORY_SIZE = 10 +BODY_SHORT_HISTORY_SIZE = 6 +SMILING_LABEL = '!! Smiling !!' +SMILING_COLOR = (0, 210, 0) # readable green for smiling label/bounding boxes + +class Color(Enum): + BLACK = '\033[30m' + RED = '\033[31m' + GREEN = '\033[32m' + YELLOW = '\033[33m' + BLUE = '\033[34m' + MAGENTA = '\033[35m' + CYAN = '\033[36m' + WHITE = '\033[37m' + COLOR_DEFAULT = '\033[39m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + INVISIBLE = '\033[08m' + REVERSE = '\033[07m' + BG_BLACK = '\033[40m' + BG_RED = '\033[41m' + BG_GREEN = '\033[42m' + BG_YELLOW = '\033[43m' + BG_BLUE = '\033[44m' + BG_MAGENTA = '\033[45m' + BG_CYAN = '\033[46m' + BG_WHITE = '\033[47m' + BG_DEFAULT = '\033[49m' + RESET = '\033[0m' + + def __str__(self): + return self.value + + def __call__(self, s): + return str(self) + str(s) + str(Color.RESET) + +@dataclass(frozen=False) +class Box(): + classid: int + score: float + x1: int + y1: int + x2: int + y2: int + cx: int + cy: int + generation: int = -1 # -1: Unknown, 0: Adult, 1: Child + gender: int = -1 # -1: Unknown, 0: Male, 1: Female + handedness: int = -1 # -1: Unknown, 0: Left, 1: Right + head_pose: int = -1 # -1: Unknown, 0: Front, 1: Right-Front, 2: Right-Side, 3: Right-Back, 4: Back, 5: Left-Back, 6: Left-Side, 7: Left-Front + is_used: bool = False + person_id: int = -1 + track_id: int = -1 + body_prob_sitting: float = -1.0 + body_state: int = -1 # -1: Unknown, 0: not_sitting, 1: sitting + body_label: str = '' + head_prob_smiling: float = -1.0 + head_state: int = -1 # -1: Unknown, 0: not_smiling, 1: smiling + head_label: str = '' + + +class BodyStateHistory: + def __init__(self, long_size: int, short_size: int) -> None: + self.long_history: Deque[bool] = deque(maxlen=long_size) + self.short_history: Deque[bool] = deque(maxlen=short_size) + self.label: str = '' + self.interval_active: bool = False + + def append(self, state: bool) -> None: + """Push latest per-frame boolean into both buffers.""" + self.long_history.append(state) + self.short_history.append(state) + +class SimpleSortTracker: + """Minimal SORT-style tracker based on IoU matching.""" + + def __init__(self, iou_threshold: float = 0.3, max_age: int = 30) -> None: + self.iou_threshold = iou_threshold + self.max_age = max_age + self.next_track_id = 1 + self.tracks: List[Dict[str, Any]] = [] + self.frame_index = 0 + + @staticmethod + def _iou(bbox_a: Tuple[int, int, int, int], bbox_b: Tuple[int, int, int, int]) -> float: + ax1, ay1, ax2, ay2 = bbox_a + bx1, by1, bx2, by2 = bbox_b + + inter_x1 = max(ax1, bx1) + inter_y1 = max(ay1, by1) + inter_x2 = min(ax2, bx2) + inter_y2 = min(ay2, by2) + + inter_w = max(0, inter_x2 - inter_x1) + inter_h = max(0, inter_y2 - inter_y1) + if inter_w == 0 or inter_h == 0: + return 0.0 + + inter_area = inter_w * inter_h + area_a = max(0, ax2 - ax1) * max(0, ay2 - ay1) + area_b = max(0, bx2 - bx1) * max(0, by2 - by1) + union = area_a + area_b - inter_area + if union <= 0: + return 0.0 + return float(inter_area / union) + + def update(self, boxes: List[Box]) -> None: + self.frame_index += 1 + + for box in boxes: + box.track_id = -1 + + if not boxes and not self.tracks: + return + + iou_matrix = None + if self.tracks and boxes: + iou_matrix = np.zeros((len(self.tracks), len(boxes)), dtype=np.float32) + for t_idx, track in enumerate(self.tracks): + track_bbox = track['bbox'] + for d_idx, box in enumerate(boxes): + det_bbox = (box.x1, box.y1, box.x2, box.y2) + iou_matrix[t_idx, d_idx] = self._iou(track_bbox, det_bbox) + + matched_tracks: set[int] = set() + matched_detections: set[int] = set() + matches: List[Tuple[int, int]] = [] + + if iou_matrix is not None and iou_matrix.size > 0: + while True: + best_track = -1 + best_det = -1 + best_iou = self.iou_threshold + for t_idx in range(len(self.tracks)): + if t_idx in matched_tracks: + continue + for d_idx in range(len(boxes)): + if d_idx in matched_detections: + continue + iou = float(iou_matrix[t_idx, d_idx]) + if iou > best_iou: + best_iou = iou + best_track = t_idx + best_det = d_idx + if best_track == -1: + break + matched_tracks.add(best_track) + matched_detections.add(best_det) + matches.append((best_track, best_det)) + + for t_idx, d_idx in matches: + track = self.tracks[t_idx] + det_box = boxes[d_idx] + track['bbox'] = (det_box.x1, det_box.y1, det_box.x2, det_box.y2) + track['missed'] = 0 + track['last_seen'] = self.frame_index + det_box.track_id = track['id'] + + surviving_tracks: List[Dict[str, Any]] = [] + for idx, track in enumerate(self.tracks): + if idx in matched_tracks: + surviving_tracks.append(track) + continue + track['missed'] += 1 + if track['missed'] <= self.max_age: + surviving_tracks.append(track) + self.tracks = surviving_tracks + + for d_idx, det_box in enumerate(boxes): + if d_idx in matched_detections: + continue + track_id = self.next_track_id + self.next_track_id += 1 + det_box.track_id = track_id + self.tracks.append( + { + 'id': track_id, + 'bbox': (det_box.x1, det_box.y1, det_box.x2, det_box.y2), + 'missed': 0, + 'last_seen': self.frame_index, + } + ) + + if not boxes: + return + +class AbstractModel(ABC): + """AbstractModel + Base class of the model. + """ + _runtime: str = 'onnx' + _model_path: str = '' + _obj_class_score_th: float = 0.35 + _attr_class_score_th: float = 0.70 + _input_shapes: List[List[int]] = [] + _input_names: List[str] = [] + _output_shapes: List[List[int]] = [] + _output_names: List[str] = [] + + # onnx/tflite + _interpreter = None + _inference_model = None + _providers = None + _swap = (2, 0, 1) + _h_index = 2 + _w_index = 3 + + # onnx + _onnx_dtypes_to_np_dtypes = { + "tensor(float)": np.float32, + "tensor(uint8)": np.uint8, + "tensor(int8)": np.int8, + } + + # tflite + _input_details = None + _output_details = None + + @abstractmethod + def __init__( + self, + *, + runtime: Optional[str] = 'onnx', + model_path: Optional[str] = '', + obj_class_score_th: Optional[float] = 0.35, + attr_class_score_th: Optional[float] = 0.70, + keypoint_th: Optional[float] = 0.25, + providers: Optional[List] = [ + ( + 'TensorrtExecutionProvider', { + 'trt_engine_cache_enable': True, + 'trt_engine_cache_path': '.', + 'trt_fp16_enable': True, + # onnxruntime>=1.21.0 breaking changes + # https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#data-dependant-shape-dds-ops + # https://github.com/microsoft/onnxruntime/pull/22681/files + # https://github.com/microsoft/onnxruntime/pull/23893/files + 'trt_op_types_to_exclude': 'NonMaxSuppression,NonZero,RoiAlign', + } + ), + 'CUDAExecutionProvider', + 'CPUExecutionProvider', + ], + ): + self._runtime = runtime + self._model_path = model_path + self._obj_class_score_th = obj_class_score_th + self._attr_class_score_th = attr_class_score_th + self._keypoint_th = keypoint_th + self._providers = providers + + # Model loading + if self._runtime == 'onnx': + import onnxruntime # type: ignore + onnxruntime.set_default_logger_severity(3) # ERROR + session_option = onnxruntime.SessionOptions() + session_option.log_severity_level = 3 + self._interpreter = \ + onnxruntime.InferenceSession( + model_path, + sess_options=session_option, + providers=providers, + ) + self._providers = self._interpreter.get_providers() + print(f'{Color.GREEN("Enabled ONNX ExecutionProviders:")}') + pprint(f'{self._providers}') + + self._input_names = [ + input.name for input in self._interpreter.get_inputs() + ] + self._input_dtypes = [ + self._onnx_dtypes_to_np_dtypes[input.type] for input in self._interpreter.get_inputs() + ] + self._output_shapes = [ + output.shape for output in self._interpreter.get_outputs() + ] + self._output_names = [ + output.name for output in self._interpreter.get_outputs() + ] + self._model = self._interpreter.run + self._swap = (2, 0, 1) + self._h_index = 2 + self._w_index = 3 + + elif self._runtime in ['ai_edge_litert', 'tensorflow']: + if self._runtime == 'ai_edge_litert': + from ai_edge_litert.interpreter import Interpreter + self._interpreter = Interpreter(model_path=model_path) + elif self._runtime == 'tensorflow': + import tensorflow as tf # type: ignore + self._interpreter = tf.lite.Interpreter(model_path=model_path) + self._input_details = self._interpreter.get_input_details() + self._output_details = self._interpreter.get_output_details() + self._input_names = [ + input.get('name', None) for input in self._input_details + ] + self._input_dtypes = [ + input.get('dtype', None) for input in self._input_details + ] + self._output_shapes = [ + output.get('shape', None) for output in self._output_details + ] + self._output_names = [ + output.get('name', None) for output in self._output_details + ] + self._model = self._interpreter.get_signature_runner() + self._swap = (0, 1, 2) + self._h_index = 1 + self._w_index = 2 + + @abstractmethod + def __call__( + self, + *, + input_datas: List[np.ndarray], + ) -> List[np.ndarray]: + datas = { + f'{input_name}': input_data \ + for input_name, input_data in zip(self._input_names, input_datas) + } + if self._runtime == 'onnx': + outputs = [ + output for output in \ + self._model( + output_names=self._output_names, + input_feed=datas, + ) + ] + return outputs + elif self._runtime in ['ai_edge_litert', 'tensorflow']: + outputs = [ + output for output in \ + self._model( + **datas + ).values() + ] + return outputs + + @abstractmethod + def _preprocess( + self, + *, + image: np.ndarray, + swap: Optional[Tuple[int,int,int]] = (2, 0, 1), + ) -> np.ndarray: + raise NotImplementedError() + + @abstractmethod + def _postprocess( + self, + *, + image: np.ndarray, + boxes: np.ndarray, + ) -> List[Box]: + raise NotImplementedError() + +class DEIMv2(AbstractModel): + def __init__( + self, + *, + runtime: Optional[str] = 'onnx', + model_path: Optional[str] = 'deimv2_dinov3_x_wholebody34_1750query_n_batch_640x640.onnx', + obj_class_score_th: Optional[float] = 0.35, + attr_class_score_th: Optional[float] = 0.70, + keypoint_th: Optional[float] = 0.35, + providers: Optional[List] = None, + ): + """ + + Parameters + ---------- + runtime: Optional[str] + Runtime for DEIMv2. Default: onnx + + model_path: Optional[str] + ONNX/TFLite file path for DEIMv2 + + obj_class_score_th: Optional[float] + Object score threshold. Default: 0.35 + + attr_class_score_th: Optional[float] + Attributes score threshold. Default: 0.70 + + keypoint_th: Optional[float] + Keypoints score threshold. Default: 0.35 + + providers: Optional[List] + Providers for ONNXRuntime. + """ + super().__init__( + runtime=runtime, + model_path=model_path, + obj_class_score_th=obj_class_score_th, + attr_class_score_th=attr_class_score_th, + keypoint_th=keypoint_th, + providers=providers, + ) + self.mean: np.ndarray = np.asarray([0.485, 0.456, 0.406], dtype=np.float32).reshape([3,1,1]) # Not used in DEIMv2 + self.std: np.ndarray = np.asarray([0.229, 0.224, 0.225], dtype=np.float32).reshape([3,1,1]) # Not used in DEIMv2 + + def __call__( + self, + image: np.ndarray, + disable_generation_identification_mode: bool, + disable_gender_identification_mode: bool, + disable_left_and_right_hand_identification_mode: bool, + disable_headpose_identification_mode: bool, + ) -> List[Box]: + """ + + Parameters + ---------- + image: np.ndarray + Entire image + + disable_generation_identification_mode: bool + + disable_gender_identification_mode: bool + + disable_left_and_right_hand_identification_mode: bool + + disable_headpose_identification_mode: bool + + Returns + ------- + result_boxes: List[Box] + Predicted boxes: [classid, score, x1, y1, x2, y2, cx, cy, atrributes, is_used=False] + """ + temp_image = copy.deepcopy(image) + # PreProcess + resized_image = \ + self._preprocess( + temp_image, + ) + # Inference + inferece_image = np.asarray([resized_image], dtype=self._input_dtypes[0]) + outputs = super().__call__(input_datas=[inferece_image]) + boxes = outputs[0][0] + # PostProcess + result_boxes = \ + self._postprocess( + image=temp_image, + boxes=boxes, + disable_generation_identification_mode=disable_generation_identification_mode, + disable_gender_identification_mode=disable_gender_identification_mode, + disable_left_and_right_hand_identification_mode=disable_left_and_right_hand_identification_mode, + disable_headpose_identification_mode=disable_headpose_identification_mode, + ) + return result_boxes + + def _preprocess( + self, + image: np.ndarray, + ) -> np.ndarray: + """_preprocess + + Parameters + ---------- + image: np.ndarray + Entire image + + Returns + ------- + resized_image: np.ndarray + Resized and normalized image. + """ + image = image.transpose(self._swap) + image = \ + np.ascontiguousarray( + image, + dtype=np.float32, + ) + return image + + def _postprocess( + self, + image: np.ndarray, + boxes: np.ndarray, + disable_generation_identification_mode: bool, + disable_gender_identification_mode: bool, + disable_left_and_right_hand_identification_mode: bool, + disable_headpose_identification_mode: bool, + ) -> List[Box]: + """_postprocess + + Parameters + ---------- + image: np.ndarray + Entire image. + + boxes: np.ndarray + float32[N, 7]. [instances, [batchno, classid, score, x1, y1, x2, y2]]. + + disable_generation_identification_mode: bool + + disable_gender_identification_mode: bool + + disable_left_and_right_hand_identification_mode: bool + + disable_headpose_identification_mode: bool + + Returns + ------- + result_boxes: List[Box] + Predicted boxes: [classid, score, x1, y1, x2, y2, cx, cy, attributes, is_used=False] + """ + image_height = image.shape[0] + image_width = image.shape[1] + + result_boxes: List[Box] = [] + + box_score_threshold: float = min([self._obj_class_score_th, self._attr_class_score_th, self._keypoint_th]) + + if len(boxes) > 0: + scores = boxes[:, 5:6] + keep_idxs = scores[:, 0] > box_score_threshold + scores_keep = scores[keep_idxs, :] + boxes_keep = boxes[keep_idxs, :] + + if len(boxes_keep) > 0: + # Object filter + for box, score in zip(boxes_keep, scores_keep): + classid = int(box[0]) + x_min = int(max(0, box[1]) * image_width) + y_min = int(max(0, box[2]) * image_height) + x_max = int(min(box[3], 1.0) * image_width) + y_max = int(min(box[4], 1.0) * image_height) + cx = (x_min + x_max) // 2 + cy = (y_min + y_max) // 2 + result_boxes.append( + Box( + classid=classid, + score=float(score), + x1=x_min, + y1=y_min, + x2=x_max, + y2=y_max, + cx=cx, + cy=cy, + generation=-1, # -1: Unknown, 0: Adult, 1: Child + gender=-1, # -1: Unknown, 0: Male, 1: Female + handedness=-1, # -1: Unknown, 0: Left, 1: Right + head_pose=-1, # -1: Unknown, 0: Front, 1: Right-Front, 2: Right-Side, 3: Right-Back, 4: Back, 5: Left-Back, 6: Left-Side, 7: Left-Front + ) + ) + # Object filter + result_boxes = [ + box for box in result_boxes \ + if (box.classid in [0,5,6,7,16,17,18,19,20,26,27,28,33] and box.score >= self._obj_class_score_th) or box.classid not in [0,5,6,7,16,17,18,19,20,26,27,28,33] + ] + # Attribute filter + result_boxes = [ + box for box in result_boxes \ + if (box.classid in [1,2,3,4,8,9,10,11,12,13,14,15] and box.score >= self._attr_class_score_th) or box.classid not in [1,2,3,4,8,9,10,11,12,13,14,15] + ] + # Keypoint filter + result_boxes = [ + box for box in result_boxes \ + if (box.classid in [21,22,23,24,25,29,30,31,32] and box.score >= self._keypoint_th) or box.classid not in [21,22,23,24,25,29,30,31,32] + ] + + # Adult, Child merge + # classid: 0 -> Body + # classid: 1 -> Adult + # classid: 2 -> Child + # 1. Calculate Adult and Child IoUs for Body detection results + # 2. Connect either the Adult or the Child with the highest score and the highest IoU with the Body. + # 3. Exclude Adult and Child from detection results + if not disable_generation_identification_mode: + body_boxes = [box for box in result_boxes if box.classid == 0] + generation_boxes = [box for box in result_boxes if box.classid in [1, 2]] + self._find_most_relevant_obj(base_objs=body_boxes, target_objs=generation_boxes) + result_boxes = [box for box in result_boxes if box.classid not in [1, 2]] + # Male, Female merge + # classid: 0 -> Body + # classid: 3 -> Male + # classid: 4 -> Female + # 1. Calculate Male and Female IoUs for Body detection results + # 2. Connect either the Male or the Female with the highest score and the highest IoU with the Body. + # 3. Exclude Male and Female from detection results + if not disable_gender_identification_mode: + body_boxes = [box for box in result_boxes if box.classid == 0] + gender_boxes = [box for box in result_boxes if box.classid in [3, 4]] + self._find_most_relevant_obj(base_objs=body_boxes, target_objs=gender_boxes) + result_boxes = [box for box in result_boxes if box.classid not in [3, 4]] + # HeadPose merge + # classid: 7 -> Head + # classid: 8 -> Front + # classid: 9 -> Right-Front + # classid: 10 -> Right-Side + # classid: 11 -> Right-Back + # classid: 12 -> Back + # classid: 13 -> Left-Back + # classid: 14 -> Left-Side + # classid: 15 -> Left-Front + # 1. Calculate HeadPose IoUs for Head detection results + # 2. Connect either the HeadPose with the highest score and the highest IoU with the Head. + # 3. Exclude HeadPose from detection results + if not disable_headpose_identification_mode: + head_boxes = [box for box in result_boxes if box.classid == 7] + headpose_boxes = [box for box in result_boxes if box.classid in [8,9,10,11,12,13,14,15]] + self._find_most_relevant_obj(base_objs=head_boxes, target_objs=headpose_boxes) + result_boxes = [box for box in result_boxes if box.classid not in [8,9,10,11,12,13,14,15]] + # Left and right hand merge + # classid: 23 -> Hand + # classid: 24 -> Left-Hand + # classid: 25 -> Right-Hand + # 1. Calculate Left-Hand and Right-Hand IoUs for Hand detection results + # 2. Connect either the Left-Hand or the Right-Hand with the highest score and the highest IoU with the Hand. + # 3. Exclude Left-Hand and Right-Hand from detection results + if not disable_left_and_right_hand_identification_mode: + hand_boxes = [box for box in result_boxes if box.classid == 26] + left_right_hand_boxes = [box for box in result_boxes if box.classid in [27, 28]] + self._find_most_relevant_obj(base_objs=hand_boxes, target_objs=left_right_hand_boxes) + result_boxes = [box for box in result_boxes if box.classid not in [27, 28]] + + # Keypoints NMS + # Suppression of overdetection + # classid: 21 -> collarbone + # classid: 22 -> shoulder + # classid: 23 -> solar_plexus + # classid: 24 -> elbow + # classid: 25 -> wrist + # classid: 29 -> abdomen + # classid: 30 -> hip_joint + # classid: 31 -> knee + # classid: 32 -> ankle + for target_classid in [21,22,23,24,25,29,30,31,32]: + keypoints_boxes = [box for box in result_boxes if box.classid == target_classid] + filtered_keypoints_boxes = self._nms(target_objs=keypoints_boxes, iou_threshold=0.20) + result_boxes = [box for box in result_boxes if box.classid != target_classid] + result_boxes = result_boxes + filtered_keypoints_boxes + return result_boxes + + def _find_most_relevant_obj( + self, + *, + base_objs: List[Box], + target_objs: List[Box], + ): + for base_obj in base_objs: + most_relevant_obj: Box = None + best_score = 0.0 + best_iou = 0.0 + best_distance = float('inf') + + for target_obj in target_objs: + distance = ((base_obj.cx - target_obj.cx)**2 + (base_obj.cy - target_obj.cy)**2)**0.5 + # Process only unused objects with center Euclidean distance less than or equal to 10.0 + if not target_obj.is_used and distance <= 10.0: + # Prioritize high-score objects + if target_obj.score >= best_score: + # IoU Calculation + iou: float = \ + self._calculate_iou( + base_obj=base_obj, + target_obj=target_obj, + ) + # Adopt object with highest IoU + if iou > best_iou: + most_relevant_obj = target_obj + best_iou = iou + # Calculate the Euclidean distance between the center coordinates + # of the base and the center coordinates of the target + best_distance = distance + best_score = target_obj.score + elif iou > 0.0 and iou == best_iou: + # Calculate the Euclidean distance between the center coordinates + # of the base and the center coordinates of the target + if distance < best_distance: + most_relevant_obj = target_obj + best_distance = distance + best_score = target_obj.score + if most_relevant_obj: + if most_relevant_obj.classid == 1: + base_obj.generation = 0 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 2: + base_obj.generation = 1 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 3: + base_obj.gender = 0 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 4: + base_obj.gender = 1 + most_relevant_obj.is_used = True + + elif most_relevant_obj.classid == 8: + base_obj.head_pose = 0 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 9: + base_obj.head_pose = 1 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 10: + base_obj.head_pose = 2 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 11: + base_obj.head_pose = 3 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 12: + base_obj.head_pose = 4 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 13: + base_obj.head_pose = 5 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 14: + base_obj.head_pose = 6 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 15: + base_obj.head_pose = 7 + most_relevant_obj.is_used = True + + elif most_relevant_obj.classid == 27: + base_obj.handedness = 0 + most_relevant_obj.is_used = True + elif most_relevant_obj.classid == 28: + base_obj.handedness = 1 + most_relevant_obj.is_used = True + + def _nms( + self, + *, + target_objs: List[Box], + iou_threshold: float, + ): + filtered_objs: List[Box] = [] + + # 1. Sorted in order of highest score + # key=lambda box: box.score to get the score, and reverse=True to sort in descending order + sorted_objs = sorted(target_objs, key=lambda box: box.score, reverse=True) + + # 2. Scan the box list after sorting + while sorted_objs: + # Extract the first (highest score) + current_box = sorted_objs.pop(0) + + # If you have already used it, skip it + if current_box.is_used: + continue + + # Add to filtered_objs and set the use flag + filtered_objs.append(current_box) + current_box.is_used = True + + # 3. Mark the boxes where the current_box and IOU are above the threshold as used or exclude them + remaining_boxes = [] + for box in sorted_objs: + if not box.is_used: + # Calculating IoU + iou_value = self._calculate_iou(base_obj=current_box, target_obj=box) + + # If the IOU threshold is exceeded, it is considered to be the same object and is removed as a duplicate + if iou_value >= iou_threshold: + # Leave as used (exclude later) + box.is_used = True + else: + # If the IOU threshold is not met, the candidate is still retained + remaining_boxes.append(box) + + # Only the remaining_boxes will be handled in the next loop + sorted_objs = remaining_boxes + + # 4. Return the box that is left over in the end + return filtered_objs + + def _calculate_iou( + self, + *, + base_obj: Box, + target_obj: Box, + ) -> float: + # Calculate areas of overlap + inter_xmin = max(base_obj.x1, target_obj.x1) + inter_ymin = max(base_obj.y1, target_obj.y1) + inter_xmax = min(base_obj.x2, target_obj.x2) + inter_ymax = min(base_obj.y2, target_obj.y2) + # If there is no overlap + if inter_xmax <= inter_xmin or inter_ymax <= inter_ymin: + return 0.0 + # Calculate area of overlap and area of each bounding box + inter_area = (inter_xmax - inter_xmin) * (inter_ymax - inter_ymin) + area1 = (base_obj.x2 - base_obj.x1) * (base_obj.y2 - base_obj.y1) + area2 = (target_obj.x2 - target_obj.x1) * (target_obj.y2 - target_obj.y1) + # Calculate IoU + iou = inter_area / float(area1 + area2 - inter_area) + return iou + +class HSC(AbstractModel): + def __init__( + self, + *, + runtime: Optional[str] = 'onnx', + model_path: Optional[str] = 'hsc_l_48x48.onnx', + providers: Optional[List] = None, + ): + super().__init__( + runtime=runtime, + model_path=model_path, + obj_class_score_th=0.0, + attr_class_score_th=0.0, + keypoint_th=0.0, + providers=providers, + ) + self._input_height, self._input_width = self._resolve_input_size() + + def _resolve_input_size(self) -> Tuple[int, int]: + default_height, default_width = 32, 32 + input_shape: Optional[List[int]] = None + if self._runtime == 'onnx': + input_shape = list(self._interpreter.get_inputs()[0].shape) + elif self._input_details: + input_shape = self._input_details[0].get('shape') + if input_shape is not None: + input_shape = list(input_shape) + + def _safe_dim(value: Any, default: int) -> int: + try: + if value is None: + return default + return int(value) + except (TypeError, ValueError): + return default + + if not input_shape or len(input_shape) <= max(self._h_index, self._w_index): + return default_height, default_width + + height = _safe_dim(input_shape[self._h_index], default_height) + width = _safe_dim(input_shape[self._w_index], default_width) + return height, width + + def __call__(self, image: np.ndarray) -> float: + if image is None or image.size == 0: + raise ValueError('Input image for HSC is empty.') + resized_image = self._preprocess(image=image) + inference_image = np.asarray([resized_image], dtype=self._input_dtypes[0]) + outputs = super().__call__(input_datas=[inference_image]) + prob_smiling = float(np.squeeze(outputs[0])) + return float(np.clip(prob_smiling, 0.0, 1.0)) + + def _preprocess( + self, + image: np.ndarray, + swap: Optional[Tuple[int, int, int]] = None, + ) -> np.ndarray: + height = self._input_height + width = self._input_width + if height <= 0 or width <= 0: + raise ValueError('Invalid target size for HSC preprocessing.') + resized = cv2.resize(image, (width, height), interpolation=cv2.INTER_LINEAR) + resized = resized.astype(np.float32) / 255.0 + resized = resized.transpose(self._swap) + return np.ascontiguousarray(resized, dtype=np.float32) + + def _postprocess( + self, + *, + image: np.ndarray, + boxes: np.ndarray, + ) -> List[Box]: + return [] + +def list_image_files(dir_path: str) -> List[str]: + path = Path(dir_path) + image_files = [] + for extension in ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']: + image_files.extend(path.rglob(extension)) + return sorted([str(file) for file in image_files]) + +def crop_image_with_margin( + image: np.ndarray, + box: Box, + *, + margin_top: int, + margin_bottom: int, + margin_left: int, + margin_right: int, +) -> Optional[np.ndarray]: + """Extracts a region with the specified pixel margins.""" + if image is None or image.size == 0: + return None + h, w = image.shape[:2] + x1 = max(int(box.x1) - margin_left, 0) + y1 = max(int(box.y1) - margin_top, 0) + x2 = min(int(box.x2) + margin_right, w) + y2 = min(int(box.y2) + margin_bottom, h) + if x2 <= x1 or y2 <= y1: + return None + return image[y1:y2, x1:x2].copy() + +def is_parsable_to_int(s): + try: + int(s) + return True + except ValueError: + return False + +def is_package_installed(package_name: str): + """Checks if the specified package is installed. + + Parameters + ---------- + package_name: str + Name of the package to be checked. + + Returns + ------- + result: bool + True if the package is installed, false otherwise. + """ + return importlib.util.find_spec(package_name) is not None + +def draw_dashed_line( + image: np.ndarray, + pt1: Tuple[int, int], + pt2: Tuple[int, int], + color: Tuple[int, int, int], + thickness: int = 1, + dash_length: int = 10, +): + """Function to draw a dashed line""" + dist = ((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) ** 2) ** 0.5 + dashes = int(dist / dash_length) + for i in range(dashes): + start = [int(pt1[0] + (pt2[0] - pt1[0]) * i / dashes), int(pt1[1] + (pt2[1] - pt1[1]) * i / dashes)] + end = [int(pt1[0] + (pt2[0] - pt1[0]) * (i + 0.5) / dashes), int(pt1[1] + (pt2[1] - pt1[1]) * (i + 0.5) / dashes)] + cv2.line(image, tuple(start), tuple(end), color, thickness) + +def draw_dashed_rectangle( + image: np.ndarray, + top_left: Tuple[int, int], + bottom_right: Tuple[int, int], + color: Tuple[int, int, int], + thickness: int = 1, + dash_length: int = 10 +): + """Function to draw a dashed rectangle""" + tl_tr = (bottom_right[0], top_left[1]) + bl_br = (top_left[0], bottom_right[1]) + draw_dashed_line(image, top_left, tl_tr, color, thickness, dash_length) + draw_dashed_line(image, tl_tr, bottom_right, color, thickness, dash_length) + draw_dashed_line(image, bottom_right, bl_br, color, thickness, dash_length) + draw_dashed_line(image, bl_br, top_left, color, thickness, dash_length) + +def distance_euclid(p1: Tuple[int,int], p2: Tuple[int,int]) -> float: + """2点 (x1, y1), (x2, y2) のユークリッド距離を返す""" + return math.hypot(p1[0]-p2[0], p1[1]-p2[1]) + +def draw_skeleton( + image: np.ndarray, + boxes: List[Box], + color=(0,255,255), + max_dist_threshold=500.0 +): + """ + 与えられた boxes (各クラスIDの関節候補) を基に、EDGESで定義された親子を + 「もっとも近い距離のペアから順番に」接合していく。ただし、 + classid=0 (人物) のバウンディングボックス内にあるキーポイント同士のみを + 接続対象とする。 + """ + # ------------------------- + # 1) 人物ボックスに ID を付与する + # ------------------------- + person_boxes = [b for b in boxes if b.classid == 0] + for i, pbox in enumerate(person_boxes): + # 便宜上、Boxクラスに person_id 属性がないので動的に付与する例 + pbox.person_id = i + + # ------------------------------------------------- + # 2) キーポイントがどの人物ボックスに属するか判断して person_id を記録 + # (複数人のバウンディングボックスが重なっている場合は、 + # 先に見つかったものを採用、など適宜ルールを決める) + # ------------------------------------------------- + keypoint_ids = {21,22,23,24,25,29,30,31,32} + for box in boxes: + if box.classid in keypoint_ids: + box.person_id = -1 + for pbox in person_boxes: + if (pbox.x1 <= box.cx <= pbox.x2) and (pbox.y1 <= box.cy <= pbox.y2): + box.person_id = pbox.person_id + break + + # ------------------------- + # 3) クラスIDごとに仕分け + # ------------------------- + classid_to_boxes: Dict[int, List[Box]] = {} + for b in boxes: + classid_to_boxes.setdefault(b.classid, []).append(b) + + edge_counts = Counter(EDGES) + + # 結果のラインを入れる + lines_to_draw = [] + + # ユークリッド距離計算の簡易関数 + def distance_euclid(p1, p2): + import math + return math.hypot(p1[0]-p2[0], p1[1]-p2[1]) + + # 各 (pid, cid) ペアに対してグルーピング + for (pid, cid), repeat_count in edge_counts.items(): + parent_list = classid_to_boxes.get(pid, []) + child_list = classid_to_boxes.get(cid, []) + + if not parent_list or not child_list: + continue + + # 親クラスIDが21 or 29の時はEDGESに書かれている回数(=repeat_count)だけマッチ可 + # それ以外は1回だけ + for_parent = repeat_count if (pid in [21, 29]) else 1 + + parent_capacity = [for_parent]*len(parent_list) # 親ごとに繋げる上限 + + # 子は常に1回のみ + child_used = [False]*len(child_list) + + # 距離が小さいペアから順に確定していくために、全ペアの距離を計算 + pair_candidates = [] + for i, pbox in enumerate(parent_list): + for j, cbox in enumerate(child_list): + # ここで "同じ person_id 同士であること" をチェック + if (pbox.person_id is not None) and (cbox.person_id is not None) and (pbox.person_id == cbox.person_id): + + dist = distance_euclid((pbox.cx, pbox.cy), (cbox.cx, cbox.cy)) + if dist <= max_dist_threshold: + pair_candidates.append((dist, i, j)) + + # 距離の小さい順に並べ替え + pair_candidates.sort(key=lambda x: x[0]) + + # 貪欲に割り当て + for dist, i, j in pair_candidates: + if parent_capacity[i] > 0 and (not child_used[j]): + # 親iがまだマッチ可能 & 子jが未使用ならマッチ確定 + pbox = parent_list[i] + cbox = child_list[j] + + lines_to_draw.append(((pbox.cx, pbox.cy), (cbox.cx, cbox.cy))) + parent_capacity[i] -= 1 + child_used[j] = True + + # ------------------------- + # 4) ラインを描画 + # ------------------------- + for (pt1, pt2) in lines_to_draw: + cv2.line(image, pt1, pt2, color, thickness=2) + +def main(): + parser = ArgumentParser() + + def check_positive(value): + ivalue = int(value) + if ivalue < 2: + raise ArgumentTypeError(f"Invalid Value: {ivalue}. Please specify an integer of 2 or greater.") + return ivalue + + parser.add_argument( + '-m', + '--model', + type=str, + default='deimv2_dinov3_x_wholebody34_680query_n_batch_640x640.onnx', + help='ONNX/TFLite file path for DEIMv2.', + ) + parser.add_argument( + '-hm', + '--hsc_model', + type=str, + default='hsc_l_48x48.onnx', + help='ONNX file path for the HSC smiling classifier.', + ) + group_v_or_i = parser.add_mutually_exclusive_group(required=True) + group_v_or_i.add_argument( + '-v', + '--video', + type=str, + help='Video file path or camera index.', + ) + group_v_or_i.add_argument( + '-i', + '--images_dir', + type=str, + help='jpg, png images folder path.', + ) + parser.add_argument( + '-ep', + '--execution_provider', + type=str, + choices=['cpu', 'cuda', 'tensorrt'], + default='cpu', + help='Execution provider for ONNXRuntime.', + ) + parser.add_argument( + '-it', + '--inference_type', + type=str, + choices=['fp16', 'int8'], + default='fp16', + help='Inference type. Default: fp16', + ) + parser.add_argument( + '-dvw', + '--disable_video_writer', + action='store_true', + help=\ + 'Disable video writer. '+ + 'Eliminates the file I/O load associated with automatic recording to MP4. '+ + 'Devices that use a MicroSD card or similar for main storage can speed up overall processing.', + ) + parser.add_argument( + '-dwk', + '--disable_waitKey', + action='store_true', + help=\ + 'Disable cv2.waitKey(). '+ + 'When you want to process a batch of still images, '+ + ' disable key-input wait and process them continuously.', + ) + parser.add_argument( + '-ost', + '--object_socre_threshold', + type=float, + default=0.35, + help=\ + 'The detection score threshold for object detection. Default: 0.35', + ) + parser.add_argument( + '-ast', + '--attribute_socre_threshold', + type=float, + default=0.70, + help=\ + 'The attribute score threshold for object detection. Default: 0.70', + ) + parser.add_argument( + '-kst', + '--keypoint_threshold', + type=float, + default=0.30, + help=\ + 'The keypoint score threshold for object detection. Default: 0.30', + ) + parser.add_argument( + '--body-long-history-size', + dest='body_long_history_size', + type=check_positive, + default=BODY_LONG_HISTORY_SIZE, + help=\ + f'History length N for bbalg long tracking buffer. Default: {BODY_LONG_HISTORY_SIZE}', + ) + parser.add_argument( + '--body-short-history-size', + dest='body_short_history_size', + type=check_positive, + default=BODY_SHORT_HISTORY_SIZE, + help=\ + f'History length M for bbalg short tracking buffer. Default: {BODY_SHORT_HISTORY_SIZE}', + ) + parser.add_argument( + '-kdm', + '--keypoint_drawing_mode', + type=str, + choices=['dot', 'box', 'both'], + default='dot', + help='Key Point Drawing Mode. Default: dot', + ) + parser.add_argument( + '-ebm', + '--enable_bone_drawing_mode', + action='store_true', + help=\ + 'Enable bone drawing mode. (Press B on the keyboard to switch modes)', + ) + parser.add_argument( + '-dnm', + '--disable_generation_identification_mode', + action='store_true', + help=\ + 'Disable generation identification mode. (Press N on the keyboard to switch modes)', + ) + parser.add_argument( + '-dgm', + '--disable_gender_identification_mode', + action='store_true', + help=\ + 'Disable gender identification mode. (Press G on the keyboard to switch modes)', + ) + parser.add_argument( + '-dlr', + '--disable_left_and_right_hand_identification_mode', + action='store_true', + help=\ + 'Disable left and right hand identification mode. (Press H on the keyboard to switch modes)', + ) + parser.add_argument( + '-dhm', + '--disable_headpose_identification_mode', + action='store_true', + help=\ + 'Disable HeadPose identification mode. (Press P on the keyboard to switch modes)', + ) + parser.add_argument( + '-drc', + '--disable_render_classids', + type=int, + nargs="*", + default=[], + help=\ + 'Class ID to disable bounding box drawing. List[int]. e.g. -drc 17 18 19', + ) + parser.add_argument( + '-efm', + '--enable_face_mosaic', + action='store_true', + help=\ + 'Enable face mosaic.', + ) + parser.add_argument( + '-dtk', + '--disable_tracking', + action='store_true', + help=\ + 'Disable instance tracking. (Press R on the keyboard to switch modes)', + ) + parser.add_argument( + '-dti', + '--disable_trackid_overlay', + action='store_true', + help=\ + 'Disable TrackID overlay. (Press T on the keyboard to switch modes)', + ) + parser.add_argument( + '-dhd', + '--disable_head_distance_measurement', + action='store_true', + help=\ + 'Disable Head distance measurement. (Press M on the keyboard to switch modes)', + ) + parser.add_argument( + '-oyt', + '--output_yolo_format_text', + action='store_true', + help=\ + 'Output YOLO format texts and images.', + ) + parser.add_argument( + '-bblw', + '--bounding_box_line_width', + type=check_positive, + default=2, + help=\ + 'Bounding box line width. Default: 2', + ) + parser.add_argument( + '-chf', + '--camera_horizontal_fov', + type=int, + default=90, + help=\ + 'Camera horizontal FOV. Default: 90', + ) + args = parser.parse_args() + body_long_history_size = args.body_long_history_size + body_short_history_size = args.body_short_history_size + + # runtime check + model_file: str = args.model + model_dir_path = os.path.dirname(os.path.abspath(model_file)) + model_ext: str = os.path.splitext(model_file)[1][1:].lower() + runtime: str = None + if model_ext == 'onnx': + if not is_package_installed('onnxruntime'): + print(Color.RED('ERROR: onnxruntime is not installed. pip install onnxruntime or pip install onnxruntime-gpu')) + sys.exit(0) + runtime = 'onnx' + elif model_ext == 'tflite': + if is_package_installed('ai_edge_litert'): + runtime = 'ai_edge_litert' + elif is_package_installed('tensorflow'): + runtime = 'tensorflow' + else: + print(Color.RED('ERROR: ai_edge_litert or tensorflow is not installed.')) + sys.exit(0) + video: str = args.video + images_dir: str = args.images_dir + disable_waitKey: bool = args.disable_waitKey + object_socre_threshold: float = args.object_socre_threshold + attribute_socre_threshold: float = args.attribute_socre_threshold + keypoint_threshold: float = args.keypoint_threshold + keypoint_drawing_mode: str = args.keypoint_drawing_mode + enable_bone_drawing_mode: bool = args.enable_bone_drawing_mode + disable_generation_identification_mode: bool = args.disable_generation_identification_mode + disable_gender_identification_mode: bool = args.disable_gender_identification_mode + disable_left_and_right_hand_identification_mode: bool = args.disable_left_and_right_hand_identification_mode + disable_headpose_identification_mode: bool = args.disable_headpose_identification_mode + disable_render_classids: List[int] = args.disable_render_classids + enable_face_mosaic: bool = args.enable_face_mosaic + enable_tracking: bool = not args.disable_tracking + enable_trackid_overlay: bool = not args.disable_trackid_overlay + enable_head_distance_measurement: bool = not args.disable_head_distance_measurement + output_yolo_format_text: bool = args.output_yolo_format_text + execution_provider: str = args.execution_provider + inference_type: str = args.inference_type + inference_type = inference_type.lower() + bounding_box_line_width: int = args.bounding_box_line_width + camera_horizontal_fov: int = args.camera_horizontal_fov + hsc_model_file: str = args.hsc_model + providers: List[Tuple[str, Dict] | str] = None + + if execution_provider == 'cpu': + providers = [ + 'CPUExecutionProvider', + ] + elif execution_provider == 'cuda': + providers = [ + 'CUDAExecutionProvider', + 'CPUExecutionProvider', + ] + elif execution_provider == 'tensorrt': + ep_type_params = {} + if inference_type == 'fp16': + ep_type_params = \ + { + "trt_fp16_enable": True, + } + elif inference_type == 'int8': + ep_type_params = \ + { + "trt_fp16_enable": True, + "trt_int8_enable": True, + "trt_int8_calibration_table_name": "calibration.flatbuffers", + } + else: + ep_type_params = \ + { + "trt_fp16_enable": True, + } + providers = [ + ( + "TensorrtExecutionProvider", + { + 'trt_engine_cache_enable': True, # .engine, .profile export + 'trt_engine_cache_path': f'{model_dir_path}', + # 'trt_max_workspace_size': 4e9, # Maximum workspace size for TensorRT engine (1e9 ≈ 1GB) + # onnxruntime>=1.21.0 breaking changes + # https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#data-dependant-shape-dds-ops + # https://github.com/microsoft/onnxruntime/pull/22681/files + # https://github.com/microsoft/onnxruntime/pull/23893/files + 'trt_op_types_to_exclude': 'NonMaxSuppression,NonZero,RoiAlign', + } | ep_type_params, + ), + "CUDAExecutionProvider", + 'CPUExecutionProvider', + ] + + print(Color.GREEN('Provider parameters:')) + pprint(providers) + + # Model initialization + model = DEIMv2( + runtime=runtime, + model_path=model_file, + obj_class_score_th=object_socre_threshold, + attr_class_score_th=attribute_socre_threshold, + keypoint_th=keypoint_threshold, + providers=providers, + ) + hsc_classifier = HSC( + runtime='onnx', + model_path=hsc_model_file, + providers=providers, + ) + use_head_crops = Path(hsc_model_file).name == 'hsc_l_48x48.onnx' + + file_paths: List[str] = None + cap = None + video_writer = None + if images_dir is not None: + file_paths = list_image_files(dir_path=images_dir) + else: + cap = cv2.VideoCapture( + int(video) if is_parsable_to_int(video) else video + ) + disable_video_writer: bool = args.disable_video_writer + if not disable_video_writer: + cap_fps = cap.get(cv2.CAP_PROP_FPS) + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fourcc = cv2.VideoWriter.fourcc(*'mp4v') + video_writer = cv2.VideoWriter( + filename='output.mp4', + fourcc=fourcc, + fps=cap_fps, + frameSize=(w, h), + ) + + file_paths_count = -1 + movie_frame_count = 0 + white_line_width = bounding_box_line_width + colored_line_width = white_line_width - 1 + tracker = SimpleSortTracker() + sitting_tracker = SimpleSortTracker() + head_tracker = SimpleSortTracker() + track_color_cache: Dict[int, np.ndarray] = {} + state_histories: Dict[int, BodyStateHistory] = {} + def get_state_history(track_id: int) -> BodyStateHistory: + history = state_histories.get(track_id) + if history is None: + history = BodyStateHistory(body_long_history_size, body_short_history_size) + state_histories[track_id] = history + return history + tracking_enabled_prev = enable_tracking + while True: + image: np.ndarray = None + if file_paths is not None: + file_paths_count += 1 + if file_paths_count <= len(file_paths) - 1: + image = cv2.imread(file_paths[file_paths_count]) + else: + break + else: + res, image = cap.read() + if not res: + break + movie_frame_count += 1 + + debug_image = copy.deepcopy(image) + debug_image_h = debug_image.shape[0] + debug_image_w = debug_image.shape[1] + + start_time = time.perf_counter() + boxes = model( + image=debug_image, + disable_generation_identification_mode=disable_generation_identification_mode, + disable_gender_identification_mode=disable_gender_identification_mode, + disable_left_and_right_hand_identification_mode=disable_left_and_right_hand_identification_mode, + disable_headpose_identification_mode=disable_headpose_identification_mode, + ) + elapsed_time = time.perf_counter() - start_time + body_boxes = [box for box in boxes if box.classid == 0] + head_boxes = [box for box in boxes if box.classid == 7] + target_boxes = head_boxes if use_head_crops else body_boxes + for box in target_boxes: + crop = crop_image_with_margin( + image=image, + box=box, + margin_top=0, + margin_bottom=0, + margin_left=0, + margin_right=0, + ) + if crop is None or crop.size == 0: + continue + rgb_crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB) + try: + prob_smiling = hsc_classifier(image=rgb_crop) + except Exception: + continue + box.head_prob_smiling = prob_smiling + box.head_state = 1 if prob_smiling >= 0.50 else 0 + + sitting_tracker.update(body_boxes) + head_tracker.update(head_boxes) + + state_boxes = target_boxes + state_tracker = head_tracker if use_head_crops else sitting_tracker + matched_state_track_ids: set[int] = set() + for state_box in state_boxes: + if state_box.track_id <= 0: + continue + matched_state_track_ids.add(state_box.track_id) + history = get_state_history(state_box.track_id) + detection_state = bool(state_box.head_state == 1) + history.append(detection_state) + ( + state_interval_judgment, + state_start_judgment, + state_end_judgment, + ) = state_verdict( + long_tracking_history=history.long_history, + short_tracking_history=history.short_history, + ) + history.interval_active = state_interval_judgment + if state_interval_judgment: + history.label = SMILING_LABEL + elif state_end_judgment: + history.label = '' + state_box.head_label = history.label + state_box.head_state = 1 if history.interval_active else 0 + + current_state_track_ids = {track['id'] for track in state_tracker.tracks} + unmatched_state_track_ids = current_state_track_ids - matched_state_track_ids + for track_id in unmatched_state_track_ids: + history = get_state_history(track_id) + history.append(False) + ( + state_interval_judgment, + state_start_judgment, + state_end_judgment, + ) = state_verdict( + long_tracking_history=history.long_history, + short_tracking_history=history.short_history, + ) + history.interval_active = state_interval_judgment + if state_interval_judgment: + history.label = SMILING_LABEL + elif state_end_judgment: + history.label = '' + + stale_history_ids = [track_id for track_id in list(state_histories.keys()) if track_id not in current_state_track_ids] + for track_id in stale_history_ids: + state_histories.pop(track_id, None) + + if file_paths is None: + cv2.putText(debug_image, f'{elapsed_time*1000:.2f} ms', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA) + cv2.putText(debug_image, f'{elapsed_time*1000:.2f} ms', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA) + + body_boxes = [box for box in boxes if box.classid == 0] + current_tracking_enabled = enable_tracking + if current_tracking_enabled: + if not tracking_enabled_prev: + tracker = SimpleSortTracker() + track_color_cache.clear() + tracker.update(body_boxes) + active_track_ids = {track['id'] for track in tracker.tracks} + stale_ids = [tid for tid in track_color_cache.keys() if tid not in active_track_ids] + for tid in stale_ids: + track_color_cache.pop(tid, None) + else: + if tracking_enabled_prev: + tracker = SimpleSortTracker() + track_color_cache.clear() + for box in boxes: + box.track_id = -1 + tracking_enabled_prev = current_tracking_enabled + + # Draw bounding boxes + for box in boxes: + classid: int = box.classid + color = (255,255,255) + + if classid in disable_render_classids: + continue + + if classid == 0: + # Body + if not disable_gender_identification_mode: + # Body + if box.gender == 0: + # Male + color = (255,0,0) + elif box.gender == 1: + # Female + color = (139,116,225) + else: + # Unknown + color = (0,200,255) if box.head_state == 1 else (0,0,255) + else: + # Body + color = (0,200,255) if box.head_state == 1 else (0,0,255) + elif classid == 5: + # Body-With-Wheelchair + color = (0,200,255) + elif classid == 6: + # Body-With-Crutches + color = (83,36,179) + elif classid == 7: + # Head + if not disable_headpose_identification_mode: + color = BOX_COLORS[box.head_pose][0] if box.head_pose != -1 else (216,67,21) + else: + color = (0,0,255) + if box.head_label: + color = SMILING_COLOR + elif classid == 16: + # Face + color = (0,200,255) + elif classid == 17: + # Eye + color = (255,0,0) + elif classid == 18: + # Nose + color = (0,255,0) + elif classid == 19: + # Mouth + color = (255,0,0) + elif classid == 20: + # Ear + color = (203,192,255) + + elif classid == 21: + # Collarbone + color = (0,0,255) + elif classid == 22: + # Shoulder + color = (255,0,0) + elif classid == 23: + # Solar_plexus + color = (252,189,107) + elif classid == 24: + # Elbow + color = (0,255,0) + elif classid == 25: + # Wrist + color = (0,0,255) + elif classid == 26: + # Hand + color = (0,255,0) + + elif classid == 29: + # abdomen + color = (0,0,255) + elif classid == 30: + # hip_joint + color = (255,0,0) + elif classid == 31: + # Knee + color = (0,0,255) + elif classid == 32: + # ankle + color = (255,0,0) + + elif classid == 33: + # Foot + color = (250,0,136) + + if (classid == 0 and not disable_gender_identification_mode) \ + or (classid == 7 and not disable_headpose_identification_mode) \ + or (classid == 26 and not disable_left_and_right_hand_identification_mode) \ + or classid == 16 \ + or classid in [21,22,23,24,25,29,30,31,32]: + + # Body + if classid == 0: + if box.gender == -1: + draw_dashed_rectangle( + image=debug_image, + top_left=(box.x1, box.y1), + bottom_right=(box.x2, box.y2), + color=color, + thickness=2, + dash_length=10 + ) + else: + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), white_line_width) + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, colored_line_width) + + # Head + elif classid == 7: + if box.head_pose == -1: + draw_dashed_rectangle( + image=debug_image, + top_left=(box.x1, box.y1), + bottom_right=(box.x2, box.y2), + color=color, + thickness=2, + dash_length=10 + ) + else: + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), white_line_width) + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, colored_line_width) + + # Face + elif classid == 16: + if enable_face_mosaic: + w = int(abs(box.x2 - box.x1)) + h = int(abs(box.y2 - box.y1)) + small_box = cv2.resize(debug_image[box.y1:box.y2, box.x1:box.x2, :], (3,3)) + normal_box = cv2.resize(small_box, (w,h)) + if normal_box.shape[0] != abs(box.y2 - box.y1) \ + or normal_box.shape[1] != abs(box.x2 - box.x1): + normal_box = cv2.resize(small_box, (abs(box.x2 - box.x1), abs(box.y2 - box.y1))) + debug_image[box.y1:box.y2, box.x1:box.x2, :] = normal_box + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), white_line_width) + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, colored_line_width) + + # Hands + elif classid == 26: + if box.handedness == -1: + draw_dashed_rectangle( + image=debug_image, + top_left=(box.x1, box.y1), + bottom_right=(box.x2, box.y2), + color=color, + thickness=2, + dash_length=10 + ) + else: + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), white_line_width) + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, colored_line_width) + + # Shoulder, Elbow, Knee + elif classid in [21,22,23,24,25,29,30,31,32]: + if keypoint_drawing_mode in ['dot', 'both']: + cv2.circle(debug_image, (box.cx, box.cy), 4, (255,255,255), -1) + cv2.circle(debug_image, (box.cx, box.cy), 3, color, -1) + if keypoint_drawing_mode in ['box', 'both']: + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), 2) + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, 1) + + else: + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), white_line_width) + cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, colored_line_width) + + # TrackID text + if enable_trackid_overlay and classid in (0, 7) and box.track_id > 0: + track_text = f'ID: {box.track_id}' + text_x = max(box.x1 - 5, 0) + text_y = box.y1 - 30 + if text_y < 20: + text_y = min(box.y2 + 25, debug_image_h - 10) + cached_color = track_color_cache.get(box.track_id) + if isinstance(cached_color, np.ndarray): + text_color = tuple(int(np.clip(v, 0, 255)) for v in cached_color.tolist()) + else: + text_color = color if isinstance(color, tuple) else (0, 200, 255) + cv2.putText( + debug_image, + track_text, + (text_x, text_y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (10, 10, 10), + 2, + cv2.LINE_AA, + ) + cv2.putText( + debug_image, + track_text, + (text_x, text_y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + text_color, + 1, + cv2.LINE_AA, + ) + + # Attributes text + generation_txt = '' + if box.generation == -1: + generation_txt = '' + elif box.generation == 0: + generation_txt = 'Adult' + elif box.generation == 1: + generation_txt = 'Child' + + gender_txt = '' + if box.gender == -1: + gender_txt = '' + elif box.gender == 0: + gender_txt = 'M' + elif box.gender == 1: + gender_txt = 'F' + + attr_txt = f'{generation_txt}({gender_txt})' if gender_txt != '' else f'{generation_txt}' + + headpose_txt = BOX_COLORS[box.head_pose][1] if box.head_pose != -1 else '' + attr_txt = f'{attr_txt} {headpose_txt}' if headpose_txt != '' else f'{attr_txt}' + smiling_label_active = classid in (0, 7) and bool(box.head_label) + if classid in (0, 7): + if box.head_label or (box.head_prob_smiling is not None and box.head_prob_smiling >= 0.0): + attr_txt = f'{box.head_label} {box.head_prob_smiling:.3f}' if box.head_label else f'{box.head_prob_smiling:.3f}' + else: + attr_txt = '' + + attr_color = SMILING_COLOR if smiling_label_active else color + if attr_txt == '': + continue + cv2.putText( + debug_image, + f'{attr_txt}', + ( + box.x1 if box.x1+50 < debug_image_w else debug_image_w-50, + box.y1-10 if box.y1-25 > 0 else 20 + ), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (255, 255, 255), + 2, + cv2.LINE_AA, + ) + cv2.putText( + debug_image, + f'{attr_txt}', + ( + box.x1 if box.x1+50 < debug_image_w else debug_image_w-50, + box.y1-10 if box.y1-25 > 0 else 20 + ), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + attr_color, + 1, + cv2.LINE_AA, + ) + + handedness_txt = '' + if box.handedness == -1: + handedness_txt = '' + elif box.handedness == 0: + handedness_txt = 'L' + elif box.handedness == 1: + handedness_txt = 'R' + cv2.putText( + debug_image, + f'{handedness_txt}', + ( + box.x1 if box.x1+50 < debug_image_w else debug_image_w-50, + box.y1-10 if box.y1-25 > 0 else 20 + ), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (255, 255, 255), + 2, + cv2.LINE_AA, + ) + cv2.putText( + debug_image, + f'{handedness_txt}', + ( + box.x1 if box.x1+50 < debug_image_w else debug_image_w-50, + box.y1-10 if box.y1-25 > 0 else 20 + ), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + color, + 1, + cv2.LINE_AA, + ) + + # Head distance + if enable_head_distance_measurement and classid == 7: + focalLength: float = 0.0 + if (camera_horizontal_fov > 90): + # Fisheye Camera (Equidistant Model) + focalLength = debug_image_w / (camera_horizontal_fov * (math.pi / 180)) + else: + # Normal camera (Pinhole Model) + focalLength = debug_image_w / (2 * math.tan((camera_horizontal_fov / 2) * (math.pi / 180))) + # Meters + distance = (AVERAGE_HEAD_WIDTH * focalLength) / abs(box.x2 - box.x1) + + cv2.putText( + debug_image, + f'{distance:.3f} m', + ( + box.x1+5 if box.x1 < debug_image_w else debug_image_w-50, + box.y1+20 if box.y1-5 > 0 else 20 + ), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (255, 255, 255), + 2, + cv2.LINE_AA, + ) + cv2.putText( + debug_image, + f'{distance:.3f} m', + ( + box.x1+5 if box.x1 < debug_image_w else debug_image_w-50, + box.y1+20 if box.y1-15 > 0 else 20 + ), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (10, 10, 10), + 1, + cv2.LINE_AA, + ) + + # cv2.putText( + # debug_image, + # f'{box.score:.2f}', + # ( + # box.x1 if box.x1+50 < debug_image_w else debug_image_w-50, + # box.y1-10 if box.y1-25 > 0 else 20 + # ), + # cv2.FONT_HERSHEY_SIMPLEX, + # 0.7, + # (255, 255, 255), + # 2, + # cv2.LINE_AA, + # ) + # cv2.putText( + # debug_image, + # f'{box.score:.2f}', + # ( + # box.x1 if box.x1+50 < debug_image_w else debug_image_w-50, + # box.y1-10 if box.y1-25 > 0 else 20 + # ), + # cv2.FONT_HERSHEY_SIMPLEX, + # 0.7, + # color, + # 1, + # cv2.LINE_AA, + # ) + + # Draw skeleton + if enable_bone_drawing_mode: + draw_skeleton(image=debug_image, boxes=boxes, color=(0, 255, 255), max_dist_threshold=300) + + if file_paths is not None: + basename = os.path.basename(file_paths[file_paths_count]) + os.makedirs('output', exist_ok=True) + cv2.imwrite(f'output/{basename}', debug_image) + + if file_paths is not None and output_yolo_format_text: + os.makedirs('output', exist_ok=True) + cv2.imwrite(f'output/{os.path.splitext(os.path.basename(file_paths[file_paths_count]))[0]}.png', image) + cv2.imwrite(f'output/{os.path.splitext(os.path.basename(file_paths[file_paths_count]))[0]}_i.png', image) + cv2.imwrite(f'output/{os.path.splitext(os.path.basename(file_paths[file_paths_count]))[0]}_o.png', debug_image) + with open(f'output/{os.path.splitext(os.path.basename(file_paths[file_paths_count]))[0]}.txt', 'w') as f: + for box in boxes: + classid = box.classid + cx = box.cx / debug_image_w + cy = box.cy / debug_image_h + w = abs(box.x2 - box.x1) / debug_image_w + h = abs(box.y2 - box.y1) / debug_image_h + f.write(f'{classid} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n') + elif file_paths is None and output_yolo_format_text: + os.makedirs('output', exist_ok=True) + cv2.imwrite(f'output/{movie_frame_count:08d}.png', image) + cv2.imwrite(f'output/{movie_frame_count:08d}_i.png', image) + cv2.imwrite(f'output/{movie_frame_count:08d}_o.png', debug_image) + with open(f'output/{movie_frame_count:08d}.txt', 'w') as f: + for box in boxes: + classid = box.classid + cx = box.cx / debug_image_w + cy = box.cy / debug_image_h + w = abs(box.x2 - box.x1) / debug_image_w + h = abs(box.y2 - box.y1) / debug_image_h + f.write(f'{classid} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n') + + if video_writer is not None: + video_writer.write(debug_image) + # video_writer.write(image) + + cv2.imshow("test", debug_image) + + key = cv2.waitKey(1) & 0xFF if file_paths is None or disable_waitKey else cv2.waitKey(0) & 0xFF + if key == ord('\x1b'): # 27, ESC + break + elif key == ord('b'): # 98, B, Bone drawing mode switch + enable_bone_drawing_mode = not enable_bone_drawing_mode + elif key == ord('n'): # 110, N, Generation mode switch + disable_generation_identification_mode = not disable_generation_identification_mode + elif key == ord('g'): # 103, G, Gender mode switch + disable_gender_identification_mode = not disable_gender_identification_mode + elif key == ord('p'): # 112, P, HeadPose mode switch + disable_headpose_identification_mode = not disable_headpose_identification_mode + elif key == ord('h'): # 104, H, HandsLR mode switch + disable_left_and_right_hand_identification_mode = not disable_left_and_right_hand_identification_mode + elif key == ord('k'): # 107, K, Keypoints mode switch + if keypoint_drawing_mode == 'dot': + keypoint_drawing_mode = 'box' + elif keypoint_drawing_mode == 'box': + keypoint_drawing_mode = 'both' + elif keypoint_drawing_mode == 'both': + keypoint_drawing_mode = 'dot' + elif key == ord('r'): # 114, R, Tracking mode switch + enable_tracking = not enable_tracking + if enable_tracking and not enable_trackid_overlay: + enable_trackid_overlay = True + elif key == ord('t'): # 116, T, TrackID overlay mode switch + enable_trackid_overlay = not enable_trackid_overlay + if not enable_tracking: + enable_trackid_overlay = False + elif key == ord('m'): # 109, M, Head distance measurement mode switch + enable_head_distance_measurement = not enable_head_distance_measurement + + if video_writer is not None: + video_writer.release() + + if cap is not None: + cap.release() + + try: + cv2.destroyAllWindows() + except: + pass + +if __name__ == "__main__": + main() diff --git a/481_WHC/download.sh b/481_WHC/download.sh new file mode 100755 index 0000000000..eda93600d9 --- /dev/null +++ b/481_WHC/download.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +curl "https://s3.ap-northeast-2.wasabisys.com/pinto-model-zoo/481_WHC/resources.tar.gz" -o resources.tar.gz +tar -zxvf resources.tar.gz +rm resources.tar.gz + +echo Download finished. diff --git a/481_WHC/url.txt b/481_WHC/url.txt new file mode 100644 index 0000000000..99ac715b1b --- /dev/null +++ b/481_WHC/url.txt @@ -0,0 +1 @@ +https://github.com/PINTO0309/WHC diff --git a/README.md b/README.md index 67ae5ef3ba..3a913e63e7 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ I have been working on quantization of various models as a hobby, but I have ski |478|SC|[■■■](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/478_SC)|||||||||||⚫|32x24, Sitting| |479|PUC|[■■■](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/479_PUC)|||||||||||⚫|32x24, Phone Usage Classifier| |480|HSC|[■■■](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/480_HSC)|||||||||||⚫|48x48, Happy smile classifier| +|481|WHC|[■■■](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/481_WHC)|||||||||||⚫|4x32x32,6x32x32,8x32x32, Waving Hand Classification| ### 2. 2D Object Detection |No.|Model Name|Link|FP32|FP16|INT8|TPU|DQ|WQ|OV|CM|TFJS|TF-TRT|ONNX|Remarks| |:-|:-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-|