Skip to content

Commit b6d4e1d

Browse files
Fix various annoying bugs
1 parent 7994646 commit b6d4e1d

File tree

5 files changed

+153
-33
lines changed

5 files changed

+153
-33
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# 0.6.6
2+
3+
## Bug fix
4+
- Fixed bug in region growing related to batching
5+
- Ball query for partial dense data on GPU was returning only the first point. Fixed now
6+
7+
18
# 0.6.5
29

310
## Additions

cuda/src/ball_query_gpu.cu

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ __global__ void query_ball_point_kernel_partial_dense(
5959
// taken from
6060
// https://github.com/rusty1s/pytorch_cluster/blob/master/cuda/radius_kernel.cu
6161
const ptrdiff_t batch_idx = blockIdx.x;
62-
const ptrdiff_t idx = threadIdx.x;
6362

6463
const ptrdiff_t start_idx_x = batch_x[batch_idx];
6564
const ptrdiff_t end_idx_x = batch_x[batch_idx + 1];
@@ -68,10 +67,10 @@ __global__ void query_ball_point_kernel_partial_dense(
6867
const ptrdiff_t end_idx_y = batch_y[batch_idx + 1];
6968
float radius2 = radius * radius;
7069

71-
for (ptrdiff_t n_x = start_idx_x + idx; n_x < end_idx_x; n_x += TOTAL_THREADS_SPARSE)
70+
for (ptrdiff_t n_y = start_idx_y + threadIdx.x; n_y < end_idx_y; n_y += blockDim.x)
7271
{
7372
int64_t count = 0;
74-
for (ptrdiff_t n_y = start_idx_y; n_y < end_idx_y; n_y++)
73+
for (ptrdiff_t n_x = start_idx_x; n_x < end_idx_x; n_x++)
7574
{
7675
float dist = 0;
7776
for (ptrdiff_t d = 0; d < 3; d++)

setup.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ def get_ext_modules():
2828
extra_compile_args += ["-DVERSION_GE_1_3"]
2929

3030
ext_src_root = "cuda"
31-
ext_sources = glob.glob("{}/src/*.cpp".format(ext_src_root)) + glob.glob("{}/src/*.cu".format(ext_src_root))
31+
ext_sources = glob.glob("{}/src/*.cpp".format(ext_src_root)) + glob.glob(
32+
"{}/src/*.cu".format(ext_src_root)
33+
)
3234

3335
ext_modules = []
3436
if CUDA_HOME:
@@ -37,7 +39,10 @@ def get_ext_modules():
3739
name="torch_points_kernels.points_cuda",
3840
sources=ext_sources,
3941
include_dirs=["{}/include".format(ext_src_root)],
40-
extra_compile_args={"cxx": extra_compile_args, "nvcc": extra_compile_args,},
42+
extra_compile_args={
43+
"cxx": extra_compile_args,
44+
"nvcc": extra_compile_args,
45+
},
4146
)
4247
)
4348

@@ -67,7 +72,7 @@ def get_cmdclass():
6772
requirements = ["torch>=1.1.0", "numba", "scikit-learn"]
6873

6974
url = "https://github.com/nicolas-chaulet/torch-points-kernels"
70-
__version__ = "0.6.5"
75+
__version__ = "0.6.6"
7176
setup(
7277
name="torch-points-kernels",
7378
version=__version__,
@@ -81,5 +86,8 @@ def get_cmdclass():
8186
cmdclass=get_cmdclass(),
8287
long_description=long_description,
8388
long_description_content_type="text/markdown",
84-
classifiers=["Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License",],
89+
classifiers=[
90+
"Programming Language :: Python :: 3",
91+
"License :: OSI Approved :: MIT License",
92+
],
8593
)

test/test_ballquerry.py

Lines changed: 121 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,34 @@
1616
class TestBall(unittest.TestCase):
1717
@run_if_cuda
1818
def test_simple_gpu(self):
19-
a = torch.tensor([[[0, 0, 0], [1, 0, 0], [2, 0, 0]], [[0, 0, 0], [1, 0, 0], [2, 0, 0]]]).to(torch.float).cuda()
19+
a = (
20+
torch.tensor(
21+
[[[0, 0, 0], [1, 0, 0], [2, 0, 0]], [[0, 0, 0], [1, 0, 0], [2, 0, 0]]]
22+
)
23+
.to(torch.float)
24+
.cuda()
25+
)
2026
b = torch.tensor([[[0, 0, 0]], [[3, 0, 0]]]).to(torch.float).cuda()
2127
idx, dist = ball_query(1.01, 2, a, b)
2228
torch.testing.assert_allclose(idx.cpu(), torch.tensor([[[0, 1]], [[2, 2]]]))
23-
torch.testing.assert_allclose(dist.cpu(), torch.tensor([[[0, 1]], [[1, -1]]]).float())
29+
torch.testing.assert_allclose(
30+
dist.cpu(), torch.tensor([[[0, 1]], [[1, -1]]]).float()
31+
)
2432

2533
def test_simple_cpu(self):
26-
a = torch.tensor([[[0, 0, 0], [1, 0, 0], [2, 0, 0]], [[0, 0, 0], [1, 0, 0], [2, 0, 0]]]).to(torch.float)
34+
a = torch.tensor(
35+
[[[0, 0, 0], [1, 0, 0], [2, 0, 0]], [[0, 0, 0], [1, 0, 0], [2, 0, 0]]]
36+
).to(torch.float)
2737
b = torch.tensor([[[0, 0, 0]], [[3, 0, 0]]]).to(torch.float)
2838
idx, dist = ball_query(1.01, 2, a, b, sort=True)
2939
torch.testing.assert_allclose(idx, torch.tensor([[[0, 1]], [[2, 2]]]))
3040
torch.testing.assert_allclose(dist, torch.tensor([[[0, 1]], [[1, -1]]]).float())
3141

3242
a = torch.tensor([[[0, 0, 0], [1, 0, 0], [1, 1, 0]]]).to(torch.float)
3343
idx, dist = ball_query(1.01, 3, a, a, sort=True)
34-
torch.testing.assert_allclose(idx, torch.tensor([[[0, 1, 0], [1, 0, 2], [2, 1, 2]]]))
44+
torch.testing.assert_allclose(
45+
idx, torch.tensor([[[0, 1, 0], [1, 0, 2], [2, 1, 2]]])
46+
)
3547

3648
@run_if_cuda
3749
def test_larger_gpu(self):
@@ -61,33 +73,40 @@ def test_cpu_gpu_equality(self):
6173
class TestBallPartial(unittest.TestCase):
6274
@run_if_cuda
6375
def test_simple_gpu(self):
64-
x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(torch.float).cuda()
76+
x = (
77+
torch.tensor([[10, 0, 0], [0.1, 0, 0], [0.2, 0, 0], [0.1, 0, 0]])
78+
.to(torch.float)
79+
.cuda()
80+
)
6581
y = torch.tensor([[0, 0, 0]]).to(torch.float).cuda()
66-
batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
82+
batch_x = torch.from_numpy(np.asarray([0, 0, 0, 1])).long().cuda()
6783
batch_y = torch.from_numpy(np.asarray([0])).long().cuda()
6884

69-
batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
70-
batch_y = torch.from_numpy(np.asarray([0])).long().cuda()
71-
72-
idx, dist2 = ball_query(1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y)
85+
idx, dist2 = ball_query(
86+
0.2, 4, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y
87+
)
7388

7489
idx = idx.detach().cpu().numpy()
7590
dist2 = dist2.detach().cpu().numpy()
7691

77-
idx_answer = np.asarray([[1, -1]])
78-
dist2_answer = np.asarray([[0.0100, -1.0000]]).astype(np.float32)
92+
idx_answer = np.asarray([[1, 2, -1, -1]])
93+
dist2_answer = np.asarray([[0.0100,0.04,-1,-1]]).astype(np.float32)
7994

8095
npt.assert_array_almost_equal(idx, idx_answer)
8196
npt.assert_array_almost_equal(dist2, dist2_answer)
8297

8398
def test_simple_cpu(self):
84-
x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [10.1, 0, 0]]).to(torch.float)
99+
x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [10.1, 0, 0]]).to(
100+
torch.float
101+
)
85102
y = torch.tensor([[0, 0, 0]]).to(torch.float)
86103

87104
batch_x = torch.from_numpy(np.asarray([0, 0, 0, 0])).long()
88105
batch_y = torch.from_numpy(np.asarray([0])).long()
89106

90-
idx, dist2 = ball_query(1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y)
107+
idx, dist2 = ball_query(
108+
1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y
109+
)
91110

92111
idx = idx.detach().cpu().numpy()
93112
dist2 = dist2.detach().cpu().numpy()
@@ -98,30 +117,75 @@ def test_simple_cpu(self):
98117
npt.assert_array_almost_equal(idx, idx_answer)
99118
npt.assert_array_almost_equal(dist2, dist2_answer)
100119

101-
102120
def test_breaks(self):
103-
x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [10.1, 0, 0]]).to(torch.float)
121+
x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [10.1, 0, 0]]).to(
122+
torch.float
123+
)
104124
y = torch.tensor([[0, 0, 0]]).to(torch.float)
105125

106126
batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long()
107127
batch_y = torch.from_numpy(np.asarray([0])).long()
108-
128+
109129
with self.assertRaises(RuntimeError):
110-
idx, dist2 = ball_query(1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y)
130+
idx, dist2 = ball_query(
131+
1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y
132+
)
111133

112-
def test_random_cpu(self):
134+
def test_random_cpu(self, cuda=False):
113135
a = torch.randn(100, 3).to(torch.float)
114136
b = torch.randn(50, 3).to(torch.float)
115-
batch_a = torch.tensor([0 for i in range(a.shape[0] // 2)] + [1 for i in range(a.shape[0] // 2, a.shape[0])])
116-
batch_b = torch.tensor([0 for i in range(b.shape[0] // 2)] + [1 for i in range(b.shape[0] // 2, b.shape[0])])
137+
batch_a = torch.tensor(
138+
[0 for i in range(a.shape[0] // 2)]
139+
+ [1 for i in range(a.shape[0] // 2, a.shape[0])]
140+
)
141+
batch_b = torch.tensor(
142+
[0 for i in range(b.shape[0] // 2)]
143+
+ [1 for i in range(b.shape[0] // 2, b.shape[0])]
144+
)
117145
R = 1
118146

119-
idx, dist = ball_query(R, 15, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b, sort=True)
120-
idx1, dist = ball_query(R, 15, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b, sort=True)
147+
idx, dist = ball_query(
148+
R,
149+
15,
150+
a,
151+
b,
152+
mode="PARTIAL_DENSE",
153+
batch_x=batch_a,
154+
batch_y=batch_b,
155+
sort=True,
156+
)
157+
idx1, dist = ball_query(
158+
R,
159+
15,
160+
a,
161+
b,
162+
mode="PARTIAL_DENSE",
163+
batch_x=batch_a,
164+
batch_y=batch_b,
165+
sort=True,
166+
)
121167
torch.testing.assert_allclose(idx1, idx)
122168
with self.assertRaises(AssertionError):
123-
idx, dist = ball_query(R, 15, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b, sort=False)
124-
idx1, dist = ball_query(R, 15, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b, sort=False)
169+
idx, dist = ball_query(
170+
R,
171+
15,
172+
a,
173+
b,
174+
mode="PARTIAL_DENSE",
175+
batch_x=batch_a,
176+
batch_y=batch_b,
177+
sort=False,
178+
)
179+
idx1, dist = ball_query(
180+
R,
181+
15,
182+
a,
183+
b,
184+
mode="PARTIAL_DENSE",
185+
batch_x=batch_a,
186+
batch_y=batch_b,
187+
sort=False,
188+
)
125189
torch.testing.assert_allclose(idx1, idx)
126190

127191
self.assertEqual(idx.shape[0], b.shape[0])
@@ -136,6 +200,38 @@ def test_random_cpu(self):
136200
if p >= 0 and p < len(batch_a):
137201
assert p in idx3_sk[i]
138202

203+
def test_random_gpu(self):
204+
a = torch.randn(100, 3).to(torch.float).cuda()
205+
b = torch.randn(50, 3).to(torch.float).cuda()
206+
batch_a = torch.tensor(
207+
[0 for i in range(a.shape[0] // 2)]
208+
+ [1 for i in range(a.shape[0] // 2, a.shape[0])]
209+
).cuda()
210+
batch_b = torch.tensor(
211+
[0 for i in range(b.shape[0] // 2)]
212+
+ [1 for i in range(b.shape[0] // 2, b.shape[0])]
213+
).cuda()
214+
R = 1
215+
216+
idx, dist = ball_query(
217+
R,
218+
15,
219+
a,
220+
b,
221+
mode="PARTIAL_DENSE",
222+
batch_x=batch_a,
223+
batch_y=batch_b,
224+
sort=False,
225+
)
226+
227+
# Comparison to see if we have the same result
228+
tree = KDTree(a.cpu().detach().numpy())
229+
idx3_sk = tree.query_radius(b.cpu().detach().numpy(), r=R)
230+
i = np.random.randint(len(batch_b))
231+
for p in idx[i].cpu().detach().numpy():
232+
if p >= 0 and p < len(batch_a):
233+
assert p in idx3_sk[i]
234+
139235

140236
if __name__ == "__main__":
141237
unittest.main()

torch_points_kernels/cluster.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,19 @@ def region_grow(
8686
# Build clusters for a given label (ignore other points)
8787
label_mask = labels == l
8888
local_ind = ind[label_mask]
89+
90+
# Remap batch to a continuous sequence
91+
label_batch = batch[label_mask]
92+
unique_in_batch = torch.unique(label_batch)
93+
remaped_batch = torch.empty_like(label_batch)
94+
for new, old in enumerate(unique_in_batch):
95+
mask = label_batch == old
96+
remaped_batch[mask] = new
97+
98+
# Cluster
8999
label_clusters = grow_proximity(
90100
pos[label_mask, :],
91-
batch[label_mask],
101+
remaped_batch,
92102
nsample=nsample,
93103
radius=radius,
94104
min_cluster_size=min_cluster_size,

0 commit comments

Comments
 (0)