Skip to content

Commit 8f4ec9e

Browse files
Handles batches for instace iou
1 parent c54d711 commit 8f4ec9e

File tree

5 files changed

+112
-46
lines changed

5 files changed

+112
-46
lines changed

cuda/include/metrics.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
at::Tensor instance_iou_cuda(at::Tensor instance_idx, at::Tensor instance_offsets,
55
at::Tensor gt_instances, at::Tensor gt_instance_sizes,
6-
long num_gt_instances);
6+
at::Tensor num_gt_instances, at::Tensor batch);

cuda/src/metrics.cpp

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,48 @@
22
#include "compat.h"
33
#include "utils.h"
44

5-
void instance_iou_kernel_wrapper(int nInstance, int nProposal, long* proposals_idx,
6-
long* proposals_offset, long* instance_labels,
7-
long* instance_pointnum, float* proposals_iou);
5+
void instance_iou_kernel_wrapper(long total_gt_instances, long max_gt_instances,
6+
const long* nInstance, int nProposal, const long* proposals_idx,
7+
const long* proposals_offset, const long* instance_labels,
8+
const long* offset_num_gt_instances, const long* batch,
9+
const long* instance_pointnum, float* proposals_iou);
810

911
at::Tensor instance_iou_cuda(at::Tensor instance_idx, at::Tensor instance_offsets,
1012
at::Tensor gt_instances, at::Tensor gt_instance_sizes,
11-
long num_gt_instances)
13+
at::Tensor num_gt_instances, at::Tensor batch)
1214
{
1315
CHECK_CONTIGUOUS(instance_idx);
1416
CHECK_CONTIGUOUS(instance_offsets);
1517
CHECK_CONTIGUOUS(gt_instances);
1618
CHECK_CONTIGUOUS(gt_instance_sizes);
19+
CHECK_CONTIGUOUS(num_gt_instances);
20+
CHECK_CONTIGUOUS(batch);
1721

1822
CHECK_CUDA(instance_idx);
1923
CHECK_CUDA(instance_offsets);
2024
CHECK_CUDA(gt_instances);
2125
CHECK_CUDA(gt_instance_sizes);
2226

27+
cudaSetDevice(instance_idx.get_device());
2328
long num_proposed_instances = instance_offsets.size(0) - 1;
29+
auto total_gt_instances = (int64_t*)malloc(sizeof(int64_t));
30+
cudaMemcpy(total_gt_instances, num_gt_instances.sum().DATA_PTR<int64_t>(), sizeof(int64_t),
31+
cudaMemcpyDeviceToHost);
32+
auto max_gt_instances = (int64_t*)malloc(sizeof(int64_t));
33+
cudaMemcpy(max_gt_instances, num_gt_instances.max().DATA_PTR<int64_t>(), sizeof(int64_t),
34+
cudaMemcpyDeviceToHost);
35+
2436
at::Tensor output =
25-
torch::zeros({num_proposed_instances, num_gt_instances},
37+
torch::zeros({num_proposed_instances, total_gt_instances[0]},
2638
at::device(gt_instances.device()).dtype(at::ScalarType::Float));
2739

28-
instance_iou_kernel_wrapper(num_gt_instances, num_proposed_instances,
29-
instance_idx.DATA_PTR<long>(), instance_offsets.DATA_PTR<long>(),
30-
gt_instances.DATA_PTR<long>(), gt_instance_sizes.DATA_PTR<long>(),
31-
output.DATA_PTR<float>());
40+
at::Tensor offset_num_gt_instances =
41+
at::cat({at::zeros(1, num_gt_instances.options()), num_gt_instances.cumsum(0)}, 0);
42+
instance_iou_kernel_wrapper(
43+
total_gt_instances[0], max_gt_instances[0], num_gt_instances.DATA_PTR<long>(),
44+
num_proposed_instances, instance_idx.DATA_PTR<long>(), instance_offsets.DATA_PTR<long>(),
45+
gt_instances.DATA_PTR<long>(), offset_num_gt_instances.DATA_PTR<long>(),
46+
batch.DATA_PTR<long>(), gt_instance_sizes.DATA_PTR<long>(), output.DATA_PTR<float>());
3247

3348
return output;
3449
}

cuda/src/metrics_gpu.cu

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,25 @@
66

77
#define THREADS 512
88

9-
__global__ void instance_iou_cuda_kernel(int nInstance, int nProposal, long* proposals_idx,
10-
long* proposals_offset, long* instance_labels,
11-
long* instance_pointnum, float* proposals_iou)
9+
__global__ void instance_iou_cuda_kernel(
10+
long total_gt_instances, const long* __restrict__ nInstance, int nProposal,
11+
const long* __restrict__ proposals_idx, const long* __restrict__ proposals_offset,
12+
const long* __restrict__ instance_labels, const long* __restrict__ offset_num_gt_instances,
13+
const long* __restrict__ batch, const long* __restrict__ instance_pointnum,
14+
float* proposals_iou)
1215
{
1316
for (int proposal_id = blockIdx.x; proposal_id < nProposal; proposal_id += gridDim.x)
1417
{
1518
int start = proposals_offset[proposal_id];
1619
int end = proposals_offset[proposal_id + 1];
20+
int sampleIdx = batch[proposals_idx[start]];
21+
int sampleNInstances = nInstance[sampleIdx];
22+
int instanceOffset = offset_num_gt_instances[sampleIdx];
1723
int proposal_total = end - start;
18-
for (int instance_id = threadIdx.x; instance_id < nInstance; instance_id += blockDim.x)
24+
for (int instance_id = threadIdx.x; instance_id < sampleNInstances;
25+
instance_id += blockDim.x)
1926
{
20-
int instance_total = instance_pointnum[instance_id];
27+
int instance_total = instance_pointnum[instanceOffset + instance_id];
2128
int intersection = 0;
2229
for (int i = start; i < end; i++)
2330
{
@@ -27,7 +34,8 @@ __global__ void instance_iou_cuda_kernel(int nInstance, int nProposal, long* pro
2734
intersection += 1;
2835
}
2936
}
30-
proposals_iou[proposal_id * nInstance + instance_id] =
37+
38+
proposals_iou[instanceOffset + instance_id + proposal_id * total_gt_instances] =
3139
(float)intersection /
3240
((float)(proposal_total + instance_total - intersection) + 1e-5);
3341
}
@@ -39,12 +47,15 @@ __global__ void instance_iou_cuda_kernel(int nInstance, int nProposal, long* pro
3947
// input: instance_labels (N), long, 0~total_nInst-1, -100
4048
// input: instance_pointnum (total_nInst), int
4149
// output: proposals_iou (nProposal, total_nInst), float
42-
void instance_iou_kernel_wrapper(int nInstance, int nProposal, long* proposals_idx,
43-
long* proposals_offset, long* instance_labels,
44-
long* instance_pointnum, float* proposals_iou)
50+
void instance_iou_kernel_wrapper(long total_gt_instances, long max_gt_instances,
51+
const long* nInstance, int nProposal, const long* proposals_idx,
52+
const long* proposals_offset, const long* instance_labels,
53+
const long* offset_num_gt_instances, const long* batch,
54+
const long* instance_pointnum, float* proposals_iou)
4555
{
56+
auto stream = at::cuda::getCurrentCUDAStream();
4657
instance_iou_cuda_kernel<<<std::min(nProposal, THREADS * THREADS),
47-
std::min(nInstance, THREADS)>>>(nInstance, nProposal, proposals_idx,
48-
proposals_offset, instance_labels,
49-
instance_pointnum, proposals_iou);
58+
std::min(max_gt_instances, (long)THREADS), 0, stream>>>(
59+
total_gt_instances, nInstance, nProposal, proposals_idx, proposals_offset, instance_labels,
60+
offset_num_gt_instances, batch, instance_pointnum, proposals_iou);
5061
}

test/test_metrics.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,31 +13,44 @@
1313

1414

1515
class TestInstanceIou(unittest.TestCase):
16-
def test_simple(self):
16+
def test_simple(self, cuda=False):
1717
gt_instances = torch.tensor([1, 2, 1, 2, 2, 3, 0])
1818
proposed_instances = [
1919
torch.tensor([0, 2]), # 100% instance 1
2020
torch.tensor([1, 4]), # 2/3 of instance 2
2121
torch.tensor([3, 5]), # 1/3 of instance 2 and 1/1 of instance 3
2222
]
23-
23+
if cuda:
24+
proposed_instances = [c.cuda() for c in proposed_instances]
25+
gt_instances = gt_instances.cuda()
2426
ious = instance_iou(proposed_instances, gt_instances)
25-
torch.testing.assert_allclose(ious, torch.tensor([[1, 0, 0], [0, 2 / 3.0, 0], [0, 1.0 / 4.0, 1.0 / 2.0]]))
27+
torch.testing.assert_allclose(ious.cpu(), torch.tensor([[1, 0, 0], [0, 2 / 3.0, 0], [0, 1.0 / 4.0, 1.0 / 2.0]]))
2628

27-
@run_if_cuda
28-
def test_simple_cuda(self):
29-
gt_instances = torch.tensor([1, 2, 1, 2, 2, 3, 0]).cuda()
29+
def test_batch(self, cuda=False):
30+
gt_instances = torch.tensor([1, 2, 1, 2, 2, 3, 0])
31+
batch = torch.tensor([0, 0, 1, 1, 1, 1, 1])
3032
proposed_instances = [
31-
torch.tensor([0, 2]).cuda(), # 100% instance 1
32-
torch.tensor([1, 4]).cuda(), # 2/3 of instance 2
33-
torch.tensor([3, 5]).cuda(), # 1/3 of instance 2 and 1/1 of instance 3
33+
torch.tensor([0, 1]), # 50% instance 1, 50% instance 2 of sample 1
34+
torch.tensor([3, 4]), # 100% instance 2 of sample 2
35+
torch.tensor([5]), # 100% of instance 3 of sample 2
3436
]
35-
36-
ious = instance_iou(proposed_instances, gt_instances)
37+
if cuda:
38+
proposed_instances = [c.cuda() for c in proposed_instances]
39+
gt_instances = gt_instances.cuda()
40+
batch = batch.cuda()
41+
ious = instance_iou(proposed_instances, gt_instances, batch=batch)
3742
torch.testing.assert_allclose(
38-
ious, torch.tensor([[1, 0, 0], [0, 2 / 3.0, 0], [0, 1.0 / 4.0, 1.0 / 2.0]]).cuda(),
43+
ious.cpu(), torch.tensor([[0.5, 0.5, 0, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1],]),
3944
)
4045

46+
@run_if_cuda
47+
def test_simple_cuda(self):
48+
self.test_simple(cuda=True)
49+
50+
@run_if_cuda
51+
def test_batch_cuda(self):
52+
self.test_batch(cuda=True)
53+
4154
@run_if_cuda
4255
def test_same(self):
4356
gt_instances = torch.randint(0, 10, (1000,))

torch_points_kernels/metrics.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import torch
2-
from typing import List
2+
from typing import List, Optional
33
import numpy as np
44
import numba
55

@@ -8,23 +8,31 @@
88

99

1010
@numba.jit(nopython=True, parallel=True)
11-
def _instance_iou_cpu(instance_idx, instance_offsets, gt_instances, gt_instance_sizes, num_gt_instances):
11+
def _instance_iou_cpu(
12+
instance_idx, instance_offsets, gt_instances, gt_instance_sizes, num_gt_instances: np.array, batch: np.array,
13+
):
1214
num_proposed_instances = len(instance_offsets) - 1
13-
iou = np.zeros((num_proposed_instances, num_gt_instances))
15+
iou = np.zeros((num_proposed_instances, num_gt_instances.sum()))
16+
offset_num_gt_instances = np.concatenate((np.array([0]), num_gt_instances.cumsum()))
1417
for proposed_instance in range(num_proposed_instances):
1518
instance = instance_idx[instance_offsets[proposed_instance] : instance_offsets[proposed_instance + 1]]
16-
for instance_id in numba.prange(1, num_gt_instances + 1):
19+
sample_idx = batch[instance[0]]
20+
gt_count_offset = offset_num_gt_instances[sample_idx]
21+
sample_instance_count = num_gt_instances[sample_idx]
22+
for instance_id in numba.prange(1, sample_instance_count + 1):
1723
intersection = 0
1824
for idx in instance:
1925
if gt_instances[idx] == instance_id:
2026
intersection += 1
21-
iou[proposed_instance, instance_id - 1] = intersection / float(
22-
len(instance) + gt_instance_sizes[instance_id - 1] - intersection
27+
iou[proposed_instance, gt_count_offset + instance_id - 1] = intersection / float(
28+
len(instance) + gt_instance_sizes[gt_count_offset + instance_id - 1] - intersection
2329
)
2430
return iou
2531

2632

27-
def instance_iou(instance_idx: List[torch.Tensor], gt_instances: torch.Tensor):
33+
def instance_iou(
34+
instance_idx: List[torch.Tensor], gt_instances: torch.Tensor, batch: Optional[torch.Tensor] = None,
35+
):
2836
""" Computes the IoU between each proposed instance in instance_idx and ground truth instances. Returns a
2937
tensor of shape [instance_idx.shape[0], num_instances] that contains the iou between the proposed instances and all gt instances
3038
Instance label 0 is reserved for non instance points
@@ -41,29 +49,48 @@ def instance_iou(instance_idx: List[torch.Tensor], gt_instances: torch.Tensor):
4149
-------
4250
ious: torch.Tensor[nb_proposals, nb_groundtruth]
4351
"""
52+
if batch is None:
53+
batch = torch.zeros_like(gt_instances)
54+
55+
# Gather number of gt instances per batch and size of those instances
4456
gt_instance_sizes = []
45-
num_gt_instances = torch.max(gt_instances).item()
46-
for instance_id in range(1, num_gt_instances + 1):
47-
gt_instance_sizes.append(torch.sum(gt_instances == instance_id))
57+
num_gt_instances = []
58+
batch_size = batch[-1] + 1
59+
for s in range(batch_size):
60+
batch_mask = batch == s
61+
sample_gt_instances = gt_instances[batch_mask]
62+
sample_num_gt_instances = torch.max(sample_gt_instances).item()
63+
num_gt_instances.append(sample_num_gt_instances)
64+
for instance_id in range(1, sample_num_gt_instances + 1):
65+
gt_instance_sizes.append(torch.sum(sample_gt_instances == instance_id))
4866
gt_instance_sizes = torch.stack(gt_instance_sizes)
67+
num_gt_instances = torch.tensor(num_gt_instances)
4968

69+
# Instance offset when flatten
5070
instance_offsets = [0]
5171
cum_offset = 0
5272
for instance in instance_idx:
5373
cum_offset += instance.shape[0]
5474
instance_offsets.append(cum_offset)
5575

76+
# Compute ious
5677
instance_idx = torch.cat(instance_idx)
5778
if gt_instances.is_cuda:
5879
return tpcuda.instance_iou_cuda(
59-
instance_idx, torch.tensor(instance_offsets).cuda(), gt_instances, gt_instance_sizes, num_gt_instances,
80+
instance_idx.cuda(),
81+
torch.tensor(instance_offsets).cuda(),
82+
gt_instances.cuda(),
83+
gt_instance_sizes.cuda(),
84+
num_gt_instances.cuda(),
85+
batch.cuda(),
6086
)
6187
else:
6288
res = _instance_iou_cpu(
6389
instance_idx.numpy(),
6490
np.asarray(instance_offsets),
6591
gt_instances.numpy(),
6692
gt_instance_sizes.numpy(),
67-
num_gt_instances,
93+
num_gt_instances.numpy(),
94+
batch.numpy(),
6895
)
6996
return torch.tensor(res).float()

0 commit comments

Comments
 (0)