Skip to content

Commit 0f68153

Browse files
authored
Update the bounding box utility to be more readable (keras-team#51)
* Update the bounding box utility to be more readable 1. Added docstring header to contain more details about whats the formats we usually use for bounding box, and what's the math conversion between them. 2. Slight change the implementation of conversion function to be more readable. The current index slice based approach is very hard to understand. 3. Added unit test for bbox util. * Fix the format issue. * Fix more format issue
1 parent faf416b commit 0f68153

File tree

2 files changed

+134
-14
lines changed

2 files changed

+134
-14
lines changed

keras_cv/utils/bbox.py

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,42 +12,86 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""Shared utility functions for working with bounding boxes."""
15+
"""Shared utility functions for working with bounding boxes.
16+
17+
Usually bounding boxes is a 2D Tensor with shape [batch, 4]. The second dimension
18+
will contain 4 numbers based on 2 different formats:
19+
20+
1. LEFT, TOP, RIGHT, BOTTOM, where LEFT, TOP represent the top-left corner
21+
coordinates, and RIGHT, BOTTOM represent the bottom-right corner coordinates.
22+
2. X, Y, WIDTH, HEIGHT, where X and Y are the coordinates for the center of the box.
23+
24+
Math wise:
25+
LEFT = X - WIDTH / 2
26+
TOP = Y - HEIGHT / 2
27+
RIGHT = X + WIDTH / 2
28+
BOTTOM = Y + HEIGHT / 2
29+
30+
X = (LEFT + RIGHT) / 2
31+
Y = (TOP + BOTTOM) / 2
32+
WIDTH = RIGHT - LEFT
33+
HEIGHT = BOTTOM - TOP
34+
35+
Note that these two formats are both commonly used. Corners format are mostly used
36+
for IOU computation, whereas XYWH are easy for bounding box generation with different
37+
center and width/height ratio.
38+
"""
1639

1740
import tensorflow as tf
1841

19-
# These are the dimensions used in Tensors to represent each corresponding side.
42+
# These are the indexes used in Tensors to represent each corresponding side.
2043
LEFT, TOP, RIGHT, BOTTOM = 0, 1, 2, 3
2144

22-
# These are the dimensions that you can use for bboxes in corners format.
45+
# These are the indexes that you can use for bounding box in XYWH format.
2346
X, Y, WIDTH, HEIGHT = 0, 1, 2, 3
2447

2548
# Regardless of format these constants are consistent.
26-
# Class is held in the 4th index
49+
# Class is held in the 5th index
2750
CLASS = 4
28-
# Confidence exists only on y_pred, and is in the 5th index.
51+
# Confidence exists only on y_pred, and is in the 6th index.
2952
CONFIDENCE = 5
3053

3154

32-
def convert_corners_to_xywh(bboxes):
33-
"""Converts bboxes in corners format to xywh format."""
55+
def corners_to_xywh(bboxes):
56+
"""Converts bboxes in corners format to XYWH format.
57+
58+
Args:
59+
bboxes: a Tensor which has at least 2D rank, with shape [..., 4]
60+
61+
Returns:
62+
converted bboxes with same shape, but in XYWH format.
63+
"""
64+
left, top, right, bottom, rest = tf.split(bboxes, [1, 1, 1, 1, -1], axis=-1)
3465
return tf.concat(
3566
[
36-
(bboxes[..., :2] + bboxes[..., 2:4]) / 2.0,
37-
bboxes[..., 2:4] - bboxes[..., :2],
38-
bboxes[..., 4:],
67+
# We use ... here in case user has higher rank of inputs.
68+
(left + right) / 2.0, # X
69+
(top + bottom) / 2.0, # Y
70+
right - left, # WIDTH
71+
bottom - top, # HEIGHT
72+
rest, # In case there is any more index after the BOTTOM.
3973
],
4074
axis=-1,
4175
)
4276

4377

4478
def xywh_to_corners(bboxes):
45-
"""Converts bboxes in xywh format to corners format."""
79+
"""Converts bboxes in XYWH format to corners format.
80+
81+
Args:
82+
bboxes: a Tensor which has at least 2D rank, with shape [..., 4]
83+
84+
Returns:
85+
converted bboxes with same shape, but in corners format.
86+
"""
87+
x, y, width, height, rest = tf.split(bboxes, [1, 1, 1, 1, -1], axis=-1)
4688
return tf.concat(
4789
[
48-
bboxes[..., :2] - bboxes[..., 2:4] / 2.0,
49-
bboxes[..., :2] + bboxes[..., 2:4] / 2.0,
50-
bboxes[..., 4:],
90+
x - width / 2.0,
91+
y - height / 2.0,
92+
x + width / 2.0,
93+
y + height / 2.0,
94+
rest, # In case there is any more index after the HEIGHT.
5195
],
5296
axis=-1,
5397
)

keras_cv/utils/bbox_test.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Copyright 2022 The KerasCV Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import tensorflow as tf
16+
17+
from keras_cv.utils import bbox
18+
19+
20+
class BBOXTestCase(tf.test.TestCase):
21+
def setUp(self):
22+
super().setUp()
23+
self.corner_bbox = tf.constant(
24+
[[10, 10, 110, 110], [20, 20, 120, 120]], dtype=tf.float32
25+
)
26+
self.xywh_bbox = tf.constant(
27+
[[60, 60, 100, 100], [70, 70, 100, 100]], dtype=tf.float32
28+
)
29+
30+
def test_corner_to_xywh(self):
31+
self.assertAllClose(bbox.corners_to_xywh(self.corner_bbox), self.xywh_bbox)
32+
33+
# Make sure it also accept higher rank than 2
34+
corner_bbox_3d = tf.expand_dims(self.corner_bbox, 0)
35+
xywh_bbox_3d = tf.expand_dims(self.xywh_bbox, 0)
36+
self.assertAllClose(bbox.corners_to_xywh(corner_bbox_3d), xywh_bbox_3d)
37+
38+
# Make sure it also accept more value after last index.
39+
padded_corner_bbox = tf.pad(
40+
self.corner_bbox, [[0, 0], [0, 2]]
41+
) # Right pad 2 more value
42+
padded_xywh_bbox = tf.pad(self.xywh_bbox, [[0, 0], [0, 2]])
43+
self.assertAllClose(
44+
bbox.corners_to_xywh(padded_corner_bbox), padded_xywh_bbox
45+
)
46+
47+
# Same for higher rank
48+
padded_corner_bbox_3d = tf.expand_dims(padded_corner_bbox, 0)
49+
padded_xywh_bbox_3d = tf.expand_dims(padded_xywh_bbox, 0)
50+
self.assertAllClose(
51+
bbox.corners_to_xywh(padded_corner_bbox_3d), padded_xywh_bbox_3d
52+
)
53+
54+
def test_xywh_to_corner(self):
55+
self.assertAllClose(bbox.xywh_to_corners(self.xywh_bbox), self.corner_bbox)
56+
57+
# Make sure it also accept higher rank than 2
58+
corner_bbox_3d = tf.expand_dims(self.corner_bbox, 0)
59+
xywh_bbox_3d = tf.expand_dims(self.xywh_bbox, 0)
60+
self.assertAllClose(bbox.xywh_to_corners(xywh_bbox_3d), corner_bbox_3d)
61+
62+
# Make sure it also accept more value after last index.
63+
padded_corner_bbox = tf.pad(
64+
self.corner_bbox, [[0, 0], [0, 2]]
65+
) # Right pad 2 more value
66+
padded_xywh_bbox = tf.pad(self.xywh_bbox, [[0, 0], [0, 2]])
67+
self.assertAllClose(
68+
bbox.xywh_to_corners(padded_xywh_bbox), padded_corner_bbox
69+
)
70+
71+
# Same for higher rank
72+
padded_corner_bbox_3d = tf.expand_dims(padded_corner_bbox, 0)
73+
padded_xywh_bbox_3d = tf.expand_dims(padded_xywh_bbox, 0)
74+
self.assertAllClose(
75+
bbox.xywh_to_corners(padded_xywh_bbox_3d), padded_corner_bbox_3d
76+
)

0 commit comments

Comments
 (0)