Skip to content

Commit 37c5f8a

Browse files
committed
add unit tests
1 parent 2886439 commit 37c5f8a

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
import numpy as np
2+
3+
from valor_lite.object_detection.computation import (
4+
calculate_ranking_boundaries,
5+
compute_counts,
6+
)
7+
8+
9+
def test_computation_calculate_ranking_boundaries_label_mismatch_edge_case():
10+
"""
11+
In v0.37.2 and earlier 'calculate_ranking_boundaries' did not factor in label matching
12+
when computing IOU boundaries. This lead to issues in 'compute_counts' where TP candidates
13+
were eliminated by IOU masking when a FP candidate from a label mismatch performed better
14+
in both IOU and score.
15+
16+
Note that input pairs have shape (N_rows, 7)
17+
0: Datum ID
18+
1: Groundtruth ID
19+
2: Prediction ID
20+
3: Groundtruth Label ID
21+
4: Prediction Label ID
22+
5: IOU
23+
6: Prediction Score
24+
"""
25+
26+
ranked_pairs = np.array(
27+
[
28+
[0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], # skip b/c mismatched label
29+
[0.0, 0.0, 1.0, 0.0, 0.0, 0.1, 0.9], # TP for IOU threshold <= 0.1
30+
[
31+
0.0,
32+
0.0,
33+
2.0,
34+
0.0,
35+
0.0,
36+
0.9,
37+
0.8,
38+
], # TP for 0.1 < IOU threshold <= 0.9
39+
[0.0, 0.0, 3.0, 0.0, 0.0, 0.5, 0.1], # this row is never reached
40+
]
41+
)
42+
43+
# ranked pairs is expected to be sorted by descending score with descending IOU as tie-breaker
44+
iou_boundary = calculate_ranking_boundaries(ranked_pairs)
45+
assert (
46+
iou_boundary
47+
== np.array(
48+
[
49+
2.0, # ineligible rows are marked with 2.0
50+
0.0, # lower IOU threshold boundary for first TP candidate
51+
0.1, # lower IOU threshold boundary for second TP candidate
52+
2.0, # ineligle row
53+
]
54+
)
55+
).all()
56+
57+
58+
def test_computation_compute_counts_ordering_edge_case():
59+
"""
60+
In v0.37.2 and earlier there was a bug where the last prediction in a bin was
61+
selected regardless of it being the maximum score or precison.
62+
63+
The PR curve is binned over 101 fixed recall points. To test this we have to first
64+
ensure that at least two predictions will lie within the same bin. We can do this
65+
by generating a single datum with a single groundtruth and having at least 2x the
66+
number of predictions as there are bins. To check the edge case we then test two
67+
variations.
68+
69+
- First prediction is the only TP
70+
- Second prediction is the only TP
71+
72+
In both cases we need to confirm that the TP is the prediction that populates the
73+
resulting precision-recall curve.
74+
75+
Note that input pairs have shape (N_rows, 8)
76+
0: Datum ID
77+
1: Groundtruth ID
78+
2: Prediction ID
79+
3: Groundtruth Label ID
80+
4: Prediction Label ID
81+
5: IOU
82+
6: Prediction Score
83+
7: IOU boundary
84+
"""
85+
N = 202
86+
datum_ids = np.zeros(N)
87+
gt_ids = np.zeros(N)
88+
pd_ids = np.arange(0, N)
89+
gt_label_ids = np.zeros(N)
90+
pd_label_ids = np.zeros(N)
91+
ious = np.zeros(N)
92+
scores = np.arange(N - 1, -1, -1) / (N - 1)
93+
iou_boundary = np.ones(N) * 2.0
94+
95+
# ==== first prediction is the TP ====
96+
ious[0] = 1.0
97+
iou_boundary[0] = 0.0
98+
99+
ranked_pairs = np.hstack(
100+
[
101+
datum_ids.reshape(-1, 1),
102+
gt_ids.reshape(-1, 1),
103+
pd_ids.reshape(-1, 1),
104+
gt_label_ids.reshape(-1, 1),
105+
pd_label_ids.reshape(-1, 1),
106+
ious.reshape(-1, 1),
107+
scores.reshape(-1, 1),
108+
iou_boundary.reshape(-1, 1),
109+
]
110+
).astype(np.float64)
111+
112+
pr_curve = np.zeros((1, 1, 101, 2)) # updated by reference
113+
_ = compute_counts(
114+
ranked_pairs=ranked_pairs,
115+
iou_thresholds=np.array([0.5]),
116+
score_thresholds=np.array([0.5]),
117+
number_of_groundtruths_per_label=np.array([N]),
118+
number_of_labels=1,
119+
running_counts=np.zeros((1, 1, 2), dtype=np.uint64),
120+
pr_curve=pr_curve,
121+
)
122+
123+
# test that pr curve contains highest precision and score per recall bin
124+
assert pr_curve.shape == (1, 1, 101, 2)
125+
assert pr_curve[0, 0, :, 0].tolist() == [1.0] + [0.0] * (
126+
100
127+
) # precision computed from first row
128+
assert pr_curve[0, 0, :, 0].tolist() == [float(scores[0])] + [0.0] * (
129+
100
130+
) # first score
131+
132+
# ==== second prediction is the TP ====
133+
ious[1] = 1.0
134+
iou_boundary[1] = 0.0
135+
136+
ranked_pairs = np.hstack(
137+
[
138+
datum_ids.reshape(-1, 1),
139+
gt_ids.reshape(-1, 1),
140+
pd_ids.reshape(-1, 1),
141+
gt_label_ids.reshape(-1, 1),
142+
pd_label_ids.reshape(-1, 1),
143+
ious.reshape(-1, 1),
144+
scores.reshape(-1, 1),
145+
iou_boundary.reshape(-1, 1),
146+
]
147+
).astype(np.float64)
148+
149+
pr_curve = np.zeros((1, 1, 101, 2)) # updated by reference
150+
_ = compute_counts(
151+
ranked_pairs=ranked_pairs,
152+
iou_thresholds=np.array([0.5]),
153+
score_thresholds=np.array([0.5]),
154+
number_of_groundtruths_per_label=np.array([N]),
155+
number_of_labels=1,
156+
running_counts=np.zeros((1, 1, 2), dtype=np.uint64),
157+
pr_curve=pr_curve,
158+
)
159+
160+
# test that pr curve contains highest precision and score per recall bin
161+
assert pr_curve.shape == (1, 1, 101, 2)
162+
assert pr_curve[0, 0, :, 0].tolist() == [1.0] + [0.0] * (
163+
100
164+
) # precision computed from second row
165+
assert pr_curve[0, 0, :, 0].tolist() == [float(scores[0])] + [0.0] * (
166+
100
167+
) # first score even though its not a TP

0 commit comments

Comments
 (0)