|
| 1 | +""" |
| 2 | +Copyright (c) 2018-2022 Intel Corporation |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +""" |
| 16 | + |
| 17 | +import numpy as np |
| 18 | + |
| 19 | +from ..adapters import Adapter |
| 20 | +from ..config import StringField, NumberField, BoolField, ListField |
| 21 | +from ..representation import DetectionPrediction |
| 22 | + |
| 23 | + |
| 24 | +class PalmDetectionAdapter(Adapter): |
| 25 | + __provider__ = 'palm_detection' |
| 26 | + |
| 27 | + @classmethod |
| 28 | + def parameters(cls): |
| 29 | + params = super().parameters() |
| 30 | + params.update({ |
| 31 | + 'scores_out': StringField(description='scores output'), |
| 32 | + 'boxes_out': StringField(description='boxes output'), |
| 33 | + 'num_anchor_layers': NumberField( |
| 34 | + description="Number of anchor layers", value_type=int, min_value=0, default=4, optional=True), |
| 35 | + 'strides': ListField(value_type=int, optional=True, default=[8, 16, 16, 16], |
| 36 | + description='strides of input multi-level feature maps'), |
| 37 | + 'min_scale': NumberField(description="Minimal scale", default=0.1484375, optional=True), |
| 38 | + 'max_scale': NumberField(description="Maximal scale", default=0.75, optional=True), |
| 39 | + 'input_size_width': NumberField( |
| 40 | + description="Width of a model input image.", value_type=int, min_value=128, default=128, optional=True), |
| 41 | + 'input_size_height': NumberField( |
| 42 | + description="Width of a model input image.", value_type=int, min_value=128, default=128, optional=True), |
| 43 | + 'reduce_boxes_in_lowest_layer': BoolField( |
| 44 | + description="Reduce size of anchors in lowest layer", default=False, optional=True), |
| 45 | + 'aspect_ratios': ListField(value_type=int, optional=True, default=[1], |
| 46 | + description='Aspect ratios of for each level of input multi-level feature maps'), |
| 47 | + 'inteprolated_scale_aspect_ratio': NumberField( |
| 48 | + description="Aspect ratio for interpolated scale", default=1, optional=True), |
| 49 | + 'fixed_anchor_size': BoolField( |
| 50 | + description="Produces anchors with fixed size", default=True, optional=True), |
| 51 | + 'sigmoid_score': BoolField(description="Score output is sigmoid", default=True, optional=True), |
| 52 | + 'score_clipping_thresh': NumberField( |
| 53 | + description="Score clipping threshold", default=100, optional=True), |
| 54 | + 'reverse_output_order': BoolField( |
| 55 | + description="(x,y) coordinates order instead of (y,x)", default=True, optional=True), |
| 56 | + 'keypoint_coord_offset': NumberField( |
| 57 | + description="Offset of keypoints coordinates", value_type=int, min_value=4, default=4, optional=True), |
| 58 | + 'num_keypoints': NumberField( |
| 59 | + description="Number of keypoints", value_type=int, min_value=0, default=7, optional=True), |
| 60 | + 'num_values_per_keypoint': NumberField( |
| 61 | + description="Number of coordinates per keypoint", |
| 62 | + value_type=int, min_value=0, default=2, optional=True), |
| 63 | + 'scales': ListField( |
| 64 | + description='Detection box scales for x,y,w,h.', value_type=int, optional=True, |
| 65 | + default=[128, 128, 128, 128]), |
| 66 | + 'min_score_thresh': NumberField(description="Minimal score threshold", default=0.5, optional=True), |
| 67 | + 'apply_exp_on_box_size': BoolField( |
| 68 | + description="Box sizes is argument of exponent", default=False, optional=True), |
| 69 | + 'num_classes': NumberField( |
| 70 | + description="Number of classes.", value_type=int, min_value=0, default=1, optional=True), |
| 71 | + }) |
| 72 | + return params |
| 73 | + |
| 74 | + def configure(self): |
| 75 | + self.scores_out = self.get_value_from_config('scores_out') |
| 76 | + self.boxes_out = self.get_value_from_config('boxes_out') |
| 77 | + self.outputs_verified = False |
| 78 | + |
| 79 | + self.num_anchor_layers = self.get_value_from_config('num_anchor_layers') |
| 80 | + self.min_scale = self.get_value_from_config('min_scale') |
| 81 | + self.max_scale = self.get_value_from_config('max_scale') |
| 82 | + self.input_size_height = self.get_value_from_config('input_size_height') |
| 83 | + self.input_size_width = self.get_value_from_config('input_size_width') |
| 84 | + self.strides = self.get_value_from_config('strides') |
| 85 | + self.reduce_boxes_in_lowest_layer = self.get_value_from_config('reduce_boxes_in_lowest_layer') |
| 86 | + self.inteprolated_scale_aspect_ratio = self.get_value_from_config('inteprolated_scale_aspect_ratio') |
| 87 | + self.fixed_anchor_size = self.get_value_from_config('fixed_anchor_size') |
| 88 | + self.aspect_ratios = self.get_value_from_config('aspect_ratios') |
| 89 | + self.anchor_offset_x = 0.5 |
| 90 | + self.anchor_offset_y = 0.5 |
| 91 | + self.feature_map_height = [] |
| 92 | + self.feature_map_width = [] |
| 93 | + self.anchors = self.generate_anchors() |
| 94 | + |
| 95 | + self.sigmoid_score = self.get_value_from_config('sigmoid_score') |
| 96 | + self.score_clipping_thresh = self.get_value_from_config('score_clipping_thresh') |
| 97 | + self.reverse_output_order = self.get_value_from_config('reverse_output_order') |
| 98 | + self.keypoint_coord_offset = self.get_value_from_config('keypoint_coord_offset') |
| 99 | + self.num_keypoints = self.get_value_from_config('num_keypoints') |
| 100 | + self.num_values_per_keypoint = self.get_value_from_config('num_values_per_keypoint') |
| 101 | + scales = self.get_value_from_config('scales') |
| 102 | + assert len(scales) == 4 |
| 103 | + self.x_scale, self.y_scale, self.w_scale, self.h_scale = scales |
| 104 | + self.min_score_thresh = self.get_value_from_config('min_score_thresh') |
| 105 | + self.apply_exp_on_box_size = self.get_value_from_config('apply_exp_on_box_size') |
| 106 | + self.num_classes = self.get_value_from_config('num_classes') |
| 107 | + |
| 108 | + def select_output_blob(self, outputs): |
| 109 | + self.scores_out = self.check_output_name(self.scores_out, outputs) |
| 110 | + self.boxes_out = self.check_output_name(self.boxes_out, outputs) |
| 111 | + self.outputs_verified = True |
| 112 | + |
| 113 | + def process(self, raw, identifiers, frame_meta): |
| 114 | + result = [] |
| 115 | + raw_output = self._extract_predictions(raw, frame_meta) |
| 116 | + if not self.outputs_verified: |
| 117 | + self.select_output_blob(raw_output) |
| 118 | + |
| 119 | + for identifier, raw_scores, raw_boxes in zip(identifiers, raw_output[self.scores_out], |
| 120 | + raw_output[self.boxes_out]): |
| 121 | + num_boxes, _ = raw_boxes.shape |
| 122 | + boxes = self.decode_boxes(raw_boxes) |
| 123 | + detection_scores = np.zeros(num_boxes) |
| 124 | + detection_classes = np.zeros(num_boxes) |
| 125 | + |
| 126 | + for i in range(num_boxes): |
| 127 | + class_id = -1 |
| 128 | + max_score = -np.inf |
| 129 | + for score_idx in range(self.num_classes): |
| 130 | + score = raw_scores[i, score_idx] |
| 131 | + if self.sigmoid_score: |
| 132 | + if self.score_clipping_thresh: |
| 133 | + score = np.clip(score, (-1) * self.score_clipping_thresh, self.score_clipping_thresh) |
| 134 | + score = 1 / (1 + np.exp((-1) * score)) |
| 135 | + if max_score < score: |
| 136 | + max_score = score |
| 137 | + class_id = score_idx |
| 138 | + detection_classes[i] = class_id |
| 139 | + detection_scores[i] = max_score |
| 140 | + cond = detection_scores >= self.min_score_thresh |
| 141 | + boxes = np.array(boxes)[cond] |
| 142 | + detection_classes = detection_classes[cond] |
| 143 | + detection_scores = detection_scores[cond] |
| 144 | + |
| 145 | + cond = ((boxes[:, 2] - boxes[:, 0]) >= 0) & ((boxes[:, 3] - boxes[:, 1]) >= 0) |
| 146 | + |
| 147 | + boxes = boxes[cond, :] |
| 148 | + detection_classes = detection_classes[cond] |
| 149 | + detection_scores = detection_scores[cond] |
| 150 | + |
| 151 | + y_mins, x_mins, y_maxs, x_maxs = boxes.T[:4, :] |
| 152 | + |
| 153 | + result.append(DetectionPrediction(identifier, detection_classes, detection_scores, |
| 154 | + x_mins, y_mins, x_maxs, y_maxs)) |
| 155 | + |
| 156 | + return result |
| 157 | + |
| 158 | + @staticmethod |
| 159 | + def calculate_scale(min_scale, max_scale, stride_index, num_strides): |
| 160 | + return (min_scale + |
| 161 | + max_scale) * 0.5 if num_strides == 1 else min_scale + (max_scale - |
| 162 | + min_scale) * stride_index / (num_strides - 1) |
| 163 | + |
| 164 | + def generate_anchors(self): |
| 165 | + anchors = [] |
| 166 | + layer_id = 0 |
| 167 | + while layer_id < self.num_anchor_layers: |
| 168 | + anchor_height = [] |
| 169 | + anchor_width = [] |
| 170 | + aspect_ratios = [] |
| 171 | + scales = [] |
| 172 | + |
| 173 | + last_same_stride_layer = layer_id |
| 174 | + while last_same_stride_layer < len(self.strides) and (self.strides[last_same_stride_layer] == |
| 175 | + self.strides[layer_id]): |
| 176 | + scale = self.calculate_scale(self.min_scale, self.max_scale, last_same_stride_layer, len(self.strides)) |
| 177 | + ar_and_s = zip([1, 2, 0.5], [0.1, scale, scale]) if ( |
| 178 | + last_same_stride_layer == 0) and self.reduce_boxes_in_lowest_layer else zip( |
| 179 | + self.aspect_ratios, [scale] * len(self.aspect_ratios)) |
| 180 | + for aspect_ratio, scale_ in ar_and_s: |
| 181 | + aspect_ratios.append(aspect_ratio) |
| 182 | + scales.append(scale_) |
| 183 | + |
| 184 | + if self.inteprolated_scale_aspect_ratio > 0: |
| 185 | + scale_next = 1 if last_same_stride_layer == len(self.strides) - 1 else self.calculate_scale( |
| 186 | + self.min_scale, self.max_scale, last_same_stride_layer + 1, len(self.strides)) |
| 187 | + scales.append(np.sqrt(scale * scale_next)) |
| 188 | + aspect_ratios.append(self.inteprolated_scale_aspect_ratio) |
| 189 | + last_same_stride_layer += 1 |
| 190 | + |
| 191 | + for aspect_ratio, scale in zip(aspect_ratios, scales): |
| 192 | + anchor_height.append(scale / np.sqrt(aspect_ratio)) |
| 193 | + anchor_width.append(scale * np.sqrt(aspect_ratio)) |
| 194 | + |
| 195 | + feature_map_height = self.feature_map_height[layer_id] if self.feature_map_height else int( |
| 196 | + np.ceil(self.input_size_height / self.strides[layer_id])) |
| 197 | + feature_map_width = self.feature_map_width[layer_id] if self.feature_map_height else int( |
| 198 | + np.ceil(self.input_size_width / self.strides[layer_id])) |
| 199 | + |
| 200 | + for y in range(feature_map_height): |
| 201 | + for x in range(feature_map_width): |
| 202 | + for anchor_w, anchor_h in zip(anchor_width, anchor_height): |
| 203 | + anchor = [(x + self.anchor_offset_x) / feature_map_width, |
| 204 | + (y + self.anchor_offset_y) / feature_map_height, |
| 205 | + 1 if self.fixed_anchor_size else anchor_w, |
| 206 | + 1 if self.fixed_anchor_size else anchor_h] |
| 207 | + |
| 208 | + anchors.append(anchor) |
| 209 | + |
| 210 | + layer_id = last_same_stride_layer |
| 211 | + |
| 212 | + return np.array(anchors) |
| 213 | + |
| 214 | + def decode_boxes(self, raw_boxes): |
| 215 | + boxes = [] |
| 216 | + num_boxes, _ = raw_boxes.shape |
| 217 | + |
| 218 | + for i in range(num_boxes): |
| 219 | + anchor = self.anchors[i, :] |
| 220 | + y_center = raw_boxes[i, 1] if self.reverse_output_order else raw_boxes[i, 0] |
| 221 | + x_center = raw_boxes[i, 0] if self.reverse_output_order else raw_boxes[i, 1] |
| 222 | + h = raw_boxes[i, 3] if self.reverse_output_order else raw_boxes[i, 2] |
| 223 | + w = raw_boxes[i, 2] if self.reverse_output_order else raw_boxes[i, 3] |
| 224 | + |
| 225 | + x_center = x_center / self.x_scale * anchor[2] + anchor[0] |
| 226 | + y_center = y_center / self.y_scale * anchor[3] + anchor[1] |
| 227 | + h = np.exp(h / self.h_scale) * anchor[3] if self.apply_exp_on_box_size else h / self.h_scale * anchor[3] |
| 228 | + w = np.exp(w / self.w_scale) * anchor[2] if self.apply_exp_on_box_size else w / self.w_scale * anchor[2] |
| 229 | + |
| 230 | + decoded = [y_center - h / 2, x_center - w / 2, y_center + h / 2, x_center + w / 2] |
| 231 | + |
| 232 | + for k in range(self.num_keypoints): |
| 233 | + offset = self.keypoint_coord_offset + k * self.num_values_per_keypoint |
| 234 | + keypoint_y = raw_boxes[i, offset + 1] if self.reverse_output_order else raw_boxes[i, offset] |
| 235 | + keypoint_x = raw_boxes[i, offset] if self.reverse_output_order else raw_boxes[i, offset + 1] |
| 236 | + |
| 237 | + decoded.append(keypoint_x / self.x_scale * anchor[2] + anchor[0]) |
| 238 | + decoded.append(keypoint_y / self.y_scale * anchor[3] + anchor[1]) |
| 239 | + |
| 240 | + boxes.append(decoded) |
| 241 | + |
| 242 | + return boxes |
0 commit comments