From ad2bff36cbf04996a50d8b32da341b7796b6878e Mon Sep 17 00:00:00 2001 From: Eugene Liu Date: Thu, 31 Oct 2024 17:29:08 +0000 Subject: [PATCH 1/5] =?UTF-8?q?=E2=9C=A8Add=20model=20descriptions=20and?= =?UTF-8?q?=20documentation=20for=20instance=20segmentation,=20detection,?= =?UTF-8?q?=20and=20keypoint=20detection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/source/index.md | 27 +++++++++ .../python/descriptions/detection_model.md | 1 + docs/source/python/descriptions/index.md | 55 +++++++++++++++++++ .../descriptions/instance_segmentation.md | 45 +++++++++++++++ .../python/descriptions/keypoint_detection.md | 51 +++++++++++++++++ 5 files changed, 179 insertions(+) create mode 100644 docs/source/python/descriptions/detection_model.md create mode 100644 docs/source/python/descriptions/index.md create mode 100644 docs/source/python/descriptions/instance_segmentation.md create mode 100644 docs/source/python/descriptions/keypoint_detection.md diff --git a/docs/source/index.md b/docs/source/index.md index 98c0965f..29dc003c 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,5 +1,32 @@ # InferenceSDK Documentation +## Model Description + +::::{grid} 1 2 2 3 +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} Instance Segmentation +:link: ./python/descriptions/instance_segmentation +:link-type: doc +[TODO] +::: + +:::{grid-item-card} Detection +:link: ./python/descriptions/detection_model +:link-type: doc +[TODO] +::: + +:::{grid-item-card} Keypoint Detection +:link: ./python/descriptions/keypoint_detection +:link-type: doc +[TODO] +::: + + +:::: + ## Python API Reference ::::{grid} 1 2 2 3 diff --git a/docs/source/python/descriptions/detection_model.md b/docs/source/python/descriptions/detection_model.md new file mode 100644 index 00000000..643f11d0 --- /dev/null +++ b/docs/source/python/descriptions/detection_model.md @@ -0,0 +1 @@ +# Detection Model \ No newline at end of file diff --git a/docs/source/python/descriptions/index.md b/docs/source/python/descriptions/index.md new file mode 100644 index 00000000..fe4286eb --- /dev/null +++ b/docs/source/python/descriptions/index.md @@ -0,0 +1,55 @@ +# Model Descriptions + +::::{grid} 1 2 2 3 +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} Detection Model +:link: ./detection_model +:link-type: doc +[todo] +::: + +:::{grid-item-card} Anomaly +:link: ./anomaly +:link-type: doc +[todo] +::: + +:::{grid-item-card} Keypoint Detection +:link: ./keypoint_detection +:link-type: doc +[todo] +::: + +:::{grid-item-card} Visual Prompting +:link: ./visual_prompting +:link-type: doc +[todo] +::: + +:::{grid-item-card} Classification +:link: ./classification +:link-type: doc +[todo] +::: + +:::{grid-item-card} Segmentation +:link: ./segmentation +:link-type: doc +[todo] +::: + +:::{grid-item-card} Instance Segmentation +:link: ./instance_segmentation +:link-type: doc +[todo] +::: + +:::{grid-item-card} Action Classification +:link: ./action_classification +:link-type: doc +[todo] +::: + +:::: \ No newline at end of file diff --git a/docs/source/python/descriptions/instance_segmentation.md b/docs/source/python/descriptions/instance_segmentation.md new file mode 100644 index 00000000..8508f46c --- /dev/null +++ b/docs/source/python/descriptions/instance_segmentation.md @@ -0,0 +1,45 @@ +# Instance Segmentation + +## Description + +Instance segmentation model aims to detect and segment objects in an image. It is an extension of object detection, where each object is segmented into a separate mask. The model outputs a list of segmented objects, each containing a mask, bounding box, score and class label. + +## OpenVINO Model Specifications + +### Inputs + +A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. + +### Outputs + +Instance segmentation model outputs a list of segmented objects (i.e `list[SegmentedObject]`)wrapped in `InstanceSegmentationResult.segmentedObjects`, each containing the following attributes: + +- `mask` (numpy.ndarray) - A binary mask of the object. +- `score` (float) - Confidence score of the object. +- `id` (int) - Class label of the object. +- `str_label` (str) - String label of the object. +- `xmin` (int) - X-coordinate of the top-left corner of the bounding box. +- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box. +- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box. +- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box. + + +## Example + +```python +import cv2 +from model_api.models import MaskRCNNModel + +# Load the model +model = MaskRCNNModel.create_model("model.xml") + +# Forward pass +predictions = model(image) + +# Iterate over the segmented objects +for pred_obj in predictions.segmentedObjects: + pred_mask = pred_obj.mask + pred_score = pred_obj.score + label_id = pred_obj.id + bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax] +``` diff --git a/docs/source/python/descriptions/keypoint_detection.md b/docs/source/python/descriptions/keypoint_detection.md new file mode 100644 index 00000000..c37cba07 --- /dev/null +++ b/docs/source/python/descriptions/keypoint_detection.md @@ -0,0 +1,51 @@ +# Keypoint Detection + +## Description + +Keypoint detection model aims to detect a set of pre-defined keypoints on a cropped object. +If a crop is not tight enough, quality of keypoints degrades. Having this model and an +object detector, one can organize keypoint detection for all objects of interest presented on an image (top-down approach). + +## Models + +Top-down keypoint detection pipeline uses detections that come from any appropriate detector, +and a keypoints regression model acting on crops. + +### Parameters + +The following parameters can be provided via python API or RT Info embedded into OV model: + +- `labels`(`list(str)`) : a list of keypoints names. + +## OpenVINO Model Specifications + +### Inputs + +A single `NCHW` tensor representing a batch of images. + +### Outputs + +Two vectors in Simple Coordinate Classification Perspective ([SimCC](https://arxiv.org/abs/2107.03332)) format: + +- `pred_x` (B, N, D1) - `x` coordinate representation, where `N` is the number of keypoints. +- `pred_y` (B, N, D2) - `y` coordinate representation, where `N` is the number of keypoints. + +## Example + +```python +import cv2 +from model_api.models import TopDownKeypointDetectionPipeline, Detection, KeypointDetectionModel + +model = KeypointDetectionModel.create_model("kp_model.xml") +# a list of detections in (x_min, y_min, x_max, y_max, score, class_id) format +detections = [Detection(0, 0, 100, 100, 1.0, 0)] +top_down_pipeline = TopDownKeypointDetectionPipeline(model) +predictions = top_down_detector.predict(image, detections) + +# iterating over a list of DetectedKeypoints. Each of the items corresponds to a detection +for obj_keypoints in predictions: + for point in obj_keypoints.keypoints.astype(np.int32): + cv2.circle( + image, point, radius=0, color=(0, 255, 0), thickness=5 + ) +``` From f9078f00821ed79d3acb5f7cf1f77591dcfa0e84 Mon Sep 17 00:00:00 2001 From: Eugene Liu Date: Thu, 31 Oct 2024 18:13:58 +0000 Subject: [PATCH 2/5] =?UTF-8?q?=E2=9C=A8Add=20detailed=20documentation=20f?= =?UTF-8?q?or=20the=20detection=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../python/descriptions/detection_model.md | 44 ++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/source/python/descriptions/detection_model.md b/docs/source/python/descriptions/detection_model.md index 643f11d0..ce51cab4 100644 --- a/docs/source/python/descriptions/detection_model.md +++ b/docs/source/python/descriptions/detection_model.md @@ -1 +1,43 @@ -# Detection Model \ No newline at end of file +# Detection Model + +## Description + +Detection model aims to detect objects in an image. The model outputs a list of detected objects, each containing a bounding box, score and class label. + +## OpenVINO Model Specifications + +### Inputs + +A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. + + +### Outputs + +Detection model outputs a list of detection objects (i.e `list[Detection]`) wrapped in `DetectionResult`, each object containing the following attributes: + +- `score` (float) - Confidence score of the object. +- `id` (int) - Class label of the object. +- `str_label` (str) - String label of the object. +- `xmin` (int) - X-coordinate of the top-left corner of the bounding box. +- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box. +- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box. +- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box. + +## Example + +```python +import cv2 +from model_api.models import SSD + +# Load the model +model = SSD.create_model("model.xml") + +# Forward pass +predictions = model(image) + +# Iterate over the segmented objects +for pred_obj in predictions.objects: + pred_score = pred_obj.score + label_id = pred_obj.id + bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax] +``` \ No newline at end of file From 771f5f1debfea53dad3bbfb9629105e7c9d473f8 Mon Sep 17 00:00:00 2001 From: Eugene Liu Date: Thu, 31 Oct 2024 18:19:06 +0000 Subject: [PATCH 3/5] Run pre-commit and remove unnecessary blank lines in documentation files --- docs/source/index.md | 1 - docs/source/python/descriptions/detection_model.md | 3 +-- docs/source/python/descriptions/index.md | 2 +- docs/source/python/descriptions/instance_segmentation.md | 1 - 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/source/index.md b/docs/source/index.md index 29dc003c..c54c9e69 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -24,7 +24,6 @@ [TODO] ::: - :::: ## Python API Reference diff --git a/docs/source/python/descriptions/detection_model.md b/docs/source/python/descriptions/detection_model.md index ce51cab4..7544dc93 100644 --- a/docs/source/python/descriptions/detection_model.md +++ b/docs/source/python/descriptions/detection_model.md @@ -10,7 +10,6 @@ Detection model aims to detect objects in an image. The model outputs a list of A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. - ### Outputs Detection model outputs a list of detection objects (i.e `list[Detection]`) wrapped in `DetectionResult`, each object containing the following attributes: @@ -40,4 +39,4 @@ for pred_obj in predictions.objects: pred_score = pred_obj.score label_id = pred_obj.id bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax] -``` \ No newline at end of file +``` diff --git a/docs/source/python/descriptions/index.md b/docs/source/python/descriptions/index.md index fe4286eb..1916928f 100644 --- a/docs/source/python/descriptions/index.md +++ b/docs/source/python/descriptions/index.md @@ -52,4 +52,4 @@ [todo] ::: -:::: \ No newline at end of file +:::: diff --git a/docs/source/python/descriptions/instance_segmentation.md b/docs/source/python/descriptions/instance_segmentation.md index 8508f46c..6257558d 100644 --- a/docs/source/python/descriptions/instance_segmentation.md +++ b/docs/source/python/descriptions/instance_segmentation.md @@ -23,7 +23,6 @@ Instance segmentation model outputs a list of segmented objects (i.e `list[Segme - `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box. - `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box. - ## Example ```python From 2b79938e2a905e8dd8e5f0d933e26527854d8e5d Mon Sep 17 00:00:00 2001 From: Eugene Liu Date: Thu, 31 Oct 2024 18:36:00 +0000 Subject: [PATCH 4/5] =?UTF-8?q?=E2=9C=A8Refactor=20documentation=20structu?= =?UTF-8?q?re=20by=20consolidating=20model=20descriptions=20and=20removing?= =?UTF-8?q?=20obsolete=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/source/index.md | 26 --------- .../python/descriptions/detection_model.md | 42 -------------- docs/source/python/descriptions/index.md | 55 ------------------- .../descriptions/instance_segmentation.md | 44 --------------- .../python/descriptions/keypoint_detection.md | 51 ----------------- docs/source/python/models/detection_model.md | 42 ++++++++++++++ .../python/models/instance_segmentation.md | 43 +++++++++++++++ .../python/models/keypoint_detection.md | 50 +++++++++++++++++ 8 files changed, 135 insertions(+), 218 deletions(-) delete mode 100644 docs/source/python/descriptions/detection_model.md delete mode 100644 docs/source/python/descriptions/index.md delete mode 100644 docs/source/python/descriptions/instance_segmentation.md delete mode 100644 docs/source/python/descriptions/keypoint_detection.md diff --git a/docs/source/index.md b/docs/source/index.md index c54c9e69..98c0965f 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,31 +1,5 @@ # InferenceSDK Documentation -## Model Description - -::::{grid} 1 2 2 3 -:margin: 1 1 0 0 -:gutter: 1 - -:::{grid-item-card} Instance Segmentation -:link: ./python/descriptions/instance_segmentation -:link-type: doc -[TODO] -::: - -:::{grid-item-card} Detection -:link: ./python/descriptions/detection_model -:link-type: doc -[TODO] -::: - -:::{grid-item-card} Keypoint Detection -:link: ./python/descriptions/keypoint_detection -:link-type: doc -[TODO] -::: - -:::: - ## Python API Reference ::::{grid} 1 2 2 3 diff --git a/docs/source/python/descriptions/detection_model.md b/docs/source/python/descriptions/detection_model.md deleted file mode 100644 index 7544dc93..00000000 --- a/docs/source/python/descriptions/detection_model.md +++ /dev/null @@ -1,42 +0,0 @@ -# Detection Model - -## Description - -Detection model aims to detect objects in an image. The model outputs a list of detected objects, each containing a bounding box, score and class label. - -## OpenVINO Model Specifications - -### Inputs - -A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. - -### Outputs - -Detection model outputs a list of detection objects (i.e `list[Detection]`) wrapped in `DetectionResult`, each object containing the following attributes: - -- `score` (float) - Confidence score of the object. -- `id` (int) - Class label of the object. -- `str_label` (str) - String label of the object. -- `xmin` (int) - X-coordinate of the top-left corner of the bounding box. -- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box. -- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box. -- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box. - -## Example - -```python -import cv2 -from model_api.models import SSD - -# Load the model -model = SSD.create_model("model.xml") - -# Forward pass -predictions = model(image) - -# Iterate over the segmented objects -for pred_obj in predictions.objects: - pred_score = pred_obj.score - label_id = pred_obj.id - bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax] -``` diff --git a/docs/source/python/descriptions/index.md b/docs/source/python/descriptions/index.md deleted file mode 100644 index 1916928f..00000000 --- a/docs/source/python/descriptions/index.md +++ /dev/null @@ -1,55 +0,0 @@ -# Model Descriptions - -::::{grid} 1 2 2 3 -:margin: 1 1 0 0 -:gutter: 1 - -:::{grid-item-card} Detection Model -:link: ./detection_model -:link-type: doc -[todo] -::: - -:::{grid-item-card} Anomaly -:link: ./anomaly -:link-type: doc -[todo] -::: - -:::{grid-item-card} Keypoint Detection -:link: ./keypoint_detection -:link-type: doc -[todo] -::: - -:::{grid-item-card} Visual Prompting -:link: ./visual_prompting -:link-type: doc -[todo] -::: - -:::{grid-item-card} Classification -:link: ./classification -:link-type: doc -[todo] -::: - -:::{grid-item-card} Segmentation -:link: ./segmentation -:link-type: doc -[todo] -::: - -:::{grid-item-card} Instance Segmentation -:link: ./instance_segmentation -:link-type: doc -[todo] -::: - -:::{grid-item-card} Action Classification -:link: ./action_classification -:link-type: doc -[todo] -::: - -:::: diff --git a/docs/source/python/descriptions/instance_segmentation.md b/docs/source/python/descriptions/instance_segmentation.md deleted file mode 100644 index 6257558d..00000000 --- a/docs/source/python/descriptions/instance_segmentation.md +++ /dev/null @@ -1,44 +0,0 @@ -# Instance Segmentation - -## Description - -Instance segmentation model aims to detect and segment objects in an image. It is an extension of object detection, where each object is segmented into a separate mask. The model outputs a list of segmented objects, each containing a mask, bounding box, score and class label. - -## OpenVINO Model Specifications - -### Inputs - -A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. - -### Outputs - -Instance segmentation model outputs a list of segmented objects (i.e `list[SegmentedObject]`)wrapped in `InstanceSegmentationResult.segmentedObjects`, each containing the following attributes: - -- `mask` (numpy.ndarray) - A binary mask of the object. -- `score` (float) - Confidence score of the object. -- `id` (int) - Class label of the object. -- `str_label` (str) - String label of the object. -- `xmin` (int) - X-coordinate of the top-left corner of the bounding box. -- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box. -- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box. -- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box. - -## Example - -```python -import cv2 -from model_api.models import MaskRCNNModel - -# Load the model -model = MaskRCNNModel.create_model("model.xml") - -# Forward pass -predictions = model(image) - -# Iterate over the segmented objects -for pred_obj in predictions.segmentedObjects: - pred_mask = pred_obj.mask - pred_score = pred_obj.score - label_id = pred_obj.id - bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax] -``` diff --git a/docs/source/python/descriptions/keypoint_detection.md b/docs/source/python/descriptions/keypoint_detection.md deleted file mode 100644 index c37cba07..00000000 --- a/docs/source/python/descriptions/keypoint_detection.md +++ /dev/null @@ -1,51 +0,0 @@ -# Keypoint Detection - -## Description - -Keypoint detection model aims to detect a set of pre-defined keypoints on a cropped object. -If a crop is not tight enough, quality of keypoints degrades. Having this model and an -object detector, one can organize keypoint detection for all objects of interest presented on an image (top-down approach). - -## Models - -Top-down keypoint detection pipeline uses detections that come from any appropriate detector, -and a keypoints regression model acting on crops. - -### Parameters - -The following parameters can be provided via python API or RT Info embedded into OV model: - -- `labels`(`list(str)`) : a list of keypoints names. - -## OpenVINO Model Specifications - -### Inputs - -A single `NCHW` tensor representing a batch of images. - -### Outputs - -Two vectors in Simple Coordinate Classification Perspective ([SimCC](https://arxiv.org/abs/2107.03332)) format: - -- `pred_x` (B, N, D1) - `x` coordinate representation, where `N` is the number of keypoints. -- `pred_y` (B, N, D2) - `y` coordinate representation, where `N` is the number of keypoints. - -## Example - -```python -import cv2 -from model_api.models import TopDownKeypointDetectionPipeline, Detection, KeypointDetectionModel - -model = KeypointDetectionModel.create_model("kp_model.xml") -# a list of detections in (x_min, y_min, x_max, y_max, score, class_id) format -detections = [Detection(0, 0, 100, 100, 1.0, 0)] -top_down_pipeline = TopDownKeypointDetectionPipeline(model) -predictions = top_down_detector.predict(image, detections) - -# iterating over a list of DetectedKeypoints. Each of the items corresponds to a detection -for obj_keypoints in predictions: - for point in obj_keypoints.keypoints.astype(np.int32): - cv2.circle( - image, point, radius=0, color=(0, 255, 0), thickness=5 - ) -``` diff --git a/docs/source/python/models/detection_model.md b/docs/source/python/models/detection_model.md index 4325f7d0..8e67531f 100644 --- a/docs/source/python/models/detection_model.md +++ b/docs/source/python/models/detection_model.md @@ -1,5 +1,47 @@ # Detection Model +## Description + +Detection model aims to detect objects in an image. The model outputs a list of detected objects, each containing a bounding box, score and class label. + +## OpenVINO Model Specifications + +### Inputs + +A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. + +### Outputs + +Detection model outputs a list of detection objects (i.e `list[Detection]`) wrapped in `DetectionResult`, each object containing the following attributes: + +- `score` (float) - Confidence score of the object. +- `id` (int) - Class label of the object. +- `str_label` (str) - String label of the object. +- `xmin` (int) - X-coordinate of the top-left corner of the bounding box. +- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box. +- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box. +- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box. + +## Example + +```python +import cv2 +from model_api.models import SSD + +# Load the model +model = SSD.create_model("model.xml") + +# Forward pass +predictions = model(image) + +# Iterate over the segmented objects +for pred_obj in predictions.objects: + pred_score = pred_obj.score + label_id = pred_obj.id + bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax] +``` + + ```{eval-rst} .. automodule:: model_api.models.detection_model :members: diff --git a/docs/source/python/models/instance_segmentation.md b/docs/source/python/models/instance_segmentation.md index 72f16d25..47d1f83d 100644 --- a/docs/source/python/models/instance_segmentation.md +++ b/docs/source/python/models/instance_segmentation.md @@ -1,5 +1,48 @@ # Instance Segmentation +## Description + +Instance segmentation model aims to detect and segment objects in an image. It is an extension of object detection, where each object is segmented into a separate mask. The model outputs a list of segmented objects, each containing a mask, bounding box, score and class label. + +## OpenVINO Model Specifications + +### Inputs + +A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. + +### Outputs + +Instance segmentation model outputs a list of segmented objects (i.e `list[SegmentedObject]`)wrapped in `InstanceSegmentationResult.segmentedObjects`, each containing the following attributes: + +- `mask` (numpy.ndarray) - A binary mask of the object. +- `score` (float) - Confidence score of the object. +- `id` (int) - Class label of the object. +- `str_label` (str) - String label of the object. +- `xmin` (int) - X-coordinate of the top-left corner of the bounding box. +- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box. +- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box. +- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box. + +## Example + +```python +import cv2 +from model_api.models import MaskRCNNModel + +# Load the model +model = MaskRCNNModel.create_model("model.xml") + +# Forward pass +predictions = model(image) + +# Iterate over the segmented objects +for pred_obj in predictions.segmentedObjects: + pred_mask = pred_obj.mask + pred_score = pred_obj.score + label_id = pred_obj.id + bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax] +``` + ```{eval-rst} .. automodule:: model_api.models.instance_segmentation :members: diff --git a/docs/source/python/models/keypoint_detection.md b/docs/source/python/models/keypoint_detection.md index b4e3afaa..279b0356 100644 --- a/docs/source/python/models/keypoint_detection.md +++ b/docs/source/python/models/keypoint_detection.md @@ -1,5 +1,55 @@ # Keypoint Detection +## Description + +Keypoint detection model aims to detect a set of pre-defined keypoints on a cropped object. +If a crop is not tight enough, quality of keypoints degrades. Having this model and an +object detector, one can organize keypoint detection for all objects of interest presented on an image (top-down approach). + +## Models + +Top-down keypoint detection pipeline uses detections that come from any appropriate detector, +and a keypoints regression model acting on crops. + +### Parameters + +The following parameters can be provided via python API or RT Info embedded into OV model: + +- `labels`(`list(str)`) : a list of keypoints names. + +## OpenVINO Model Specifications + +### Inputs + +A single `NCHW` tensor representing a batch of images. + +### Outputs + +Two vectors in Simple Coordinate Classification Perspective ([SimCC](https://arxiv.org/abs/2107.03332)) format: + +- `pred_x` (B, N, D1) - `x` coordinate representation, where `N` is the number of keypoints. +- `pred_y` (B, N, D2) - `y` coordinate representation, where `N` is the number of keypoints. + +## Example + +```python +import cv2 +from model_api.models import TopDownKeypointDetectionPipeline, Detection, KeypointDetectionModel + +model = KeypointDetectionModel.create_model("kp_model.xml") +# a list of detections in (x_min, y_min, x_max, y_max, score, class_id) format +detections = [Detection(0, 0, 100, 100, 1.0, 0)] +top_down_pipeline = TopDownKeypointDetectionPipeline(model) +predictions = top_down_detector.predict(image, detections) + +# iterating over a list of DetectedKeypoints. Each of the items corresponds to a detection +for obj_keypoints in predictions: + for point in obj_keypoints.keypoints.astype(np.int32): + cv2.circle( + image, point, radius=0, color=(0, 255, 0), thickness=5 + ) +``` + ```{eval-rst} .. automodule:: model_api.models.keypoint_detection :members: From 83c64cb503800dcec269b75b046279aef141e4e2 Mon Sep 17 00:00:00 2001 From: Eugene Liu Date: Mon, 4 Nov 2024 08:34:58 +0000 Subject: [PATCH 5/5] Reformat --- docs/source/python/models/detection_model.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/python/models/detection_model.md b/docs/source/python/models/detection_model.md index 8e67531f..fce04dca 100644 --- a/docs/source/python/models/detection_model.md +++ b/docs/source/python/models/detection_model.md @@ -41,7 +41,6 @@ for pred_obj in predictions.objects: bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax] ``` - ```{eval-rst} .. automodule:: model_api.models.detection_model :members: