Skip to content

Commit 95ea276

Browse files
Merge pull request #44 from Uakh/windows
Adding Windows support
2 parents 0220fa5 + 4f41421 commit 95ea276

File tree

4 files changed

+33
-33
lines changed

4 files changed

+33
-33
lines changed

cuda/src/ball_query.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
#include "utils.h"
44

55
void query_ball_point_kernel_dense_wrapper(int b, int n, int m, float radius, int nsample,
6-
const float* new_xyz, const float* xyz, long* idx,
6+
const float* new_xyz, const float* xyz, int64_t* idx,
77
float* dist_out);
88

9-
void query_ball_point_kernel_partial_wrapper(long batch_size, int size_x, int size_y, float radius,
9+
void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y, float radius,
1010
int nsample, const float* x, const float* y,
11-
const long* batch_x, const long* batch_y,
12-
long* idx_out, float* dist_out);
11+
const int64_t* batch_x, const int64_t* batch_y,
12+
int64_t* idx_out, float* dist_out);
1313

1414
std::pair<at::Tensor, at::Tensor> ball_query_dense(at::Tensor new_xyz, at::Tensor xyz,
1515
const float radius, const int nsample)
@@ -29,7 +29,7 @@ std::pair<at::Tensor, at::Tensor> ball_query_dense(at::Tensor new_xyz, at::Tenso
2929

3030
query_ball_point_kernel_dense_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), radius,
3131
nsample, new_xyz.DATA_PTR<float>(), xyz.DATA_PTR<float>(),
32-
idx.DATA_PTR<long>(), dist.DATA_PTR<float>());
32+
idx.DATA_PTR<int64_t>(), dist.DATA_PTR<float>());
3333

3434
return std::make_pair(idx, dist);
3535
}
@@ -73,8 +73,8 @@ std::pair<at::Tensor, at::Tensor> ball_query_partial_dense(at::Tensor x, at::Ten
7373

7474
query_ball_point_kernel_partial_wrapper(batch_size, x.size(0), y.size(0), radius, nsample,
7575
x.DATA_PTR<float>(), y.DATA_PTR<float>(),
76-
batch_x.DATA_PTR<long>(), batch_y.DATA_PTR<long>(),
77-
idx.DATA_PTR<long>(), dist.DATA_PTR<float>());
76+
batch_x.DATA_PTR<int64_t>(), batch_y.DATA_PTR<int64_t>(),
77+
idx.DATA_PTR<int64_t>(), dist.DATA_PTR<float>());
7878

7979
return std::make_pair(idx, dist);
8080
}

cuda/src/ball_query_gpu.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
__global__ void query_ball_point_kernel_dense(int b, int n, int m, float radius, int nsample,
1010
const float* __restrict__ new_xyz,
1111
const float* __restrict__ xyz,
12-
long* __restrict__ idx_out,
12+
int64_t* __restrict__ idx_out,
1313
float* __restrict__ dist_out)
1414
{
1515
int batch_index = blockIdx.x;
@@ -53,7 +53,7 @@ __global__ void query_ball_point_kernel_dense(int b, int n, int m, float radius,
5353

5454
__global__ void query_ball_point_kernel_partial_dense(
5555
int size_x, int size_y, float radius, int nsample, const float* __restrict__ x,
56-
const float* __restrict__ y, const long* __restrict__ batch_x, const long* __restrict__ batch_y,
56+
const float* __restrict__ y, const int64_t* __restrict__ batch_x, const int64_t* __restrict__ batch_y,
5757
int64_t* __restrict__ idx_out, float* __restrict__ dist_out)
5858
{
5959
// taken from
@@ -93,7 +93,7 @@ __global__ void query_ball_point_kernel_partial_dense(
9393
}
9494

9595
void query_ball_point_kernel_dense_wrapper(int b, int n, int m, float radius, int nsample,
96-
const float* new_xyz, const float* xyz, long* idx,float* dist_out)
96+
const float* new_xyz, const float* xyz, int64_t* idx,float* dist_out)
9797
{
9898
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
9999
query_ball_point_kernel_dense<<<b, opt_n_threads(m), 0, stream>>>(b, n, m, radius, nsample,
@@ -102,9 +102,9 @@ void query_ball_point_kernel_dense_wrapper(int b, int n, int m, float radius, in
102102
CUDA_CHECK_ERRORS();
103103
}
104104

105-
void query_ball_point_kernel_partial_wrapper(long batch_size, int size_x, int size_y, float radius,
105+
void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y, float radius,
106106
int nsample, const float* x, const float* y,
107-
const long* batch_x, const long* batch_y,
107+
const int64_t* batch_x, const int64_t* batch_y,
108108
int64_t* idx_out, float* dist_out)
109109
{
110110
query_ball_point_kernel_partial_dense<<<batch_size, TOTAL_THREADS_SPARSE>>>(

cuda/src/metrics.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
#include "compat.h"
33
#include "utils.h"
44

5-
void instance_iou_kernel_wrapper(long total_gt_instances, long max_gt_instances,
6-
const long* nInstance, int nProposal, const long* proposals_idx,
7-
const long* proposals_offset, const long* instance_labels,
8-
const long* offset_num_gt_instances, const long* batch,
9-
const long* instance_pointnum, float* proposals_iou);
5+
void instance_iou_kernel_wrapper(int64_t total_gt_instances, int64_t max_gt_instances,
6+
const int64_t* nInstance, int nProposal, const int64_t* proposals_idx,
7+
const int64_t* proposals_offset, const int64_t* instance_labels,
8+
const int64_t* offset_num_gt_instances, const int64_t* batch,
9+
const int64_t* instance_pointnum, float* proposals_iou);
1010

1111
at::Tensor instance_iou_cuda(at::Tensor instance_idx, at::Tensor instance_offsets,
1212
at::Tensor gt_instances, at::Tensor gt_instance_sizes,
@@ -25,7 +25,7 @@ at::Tensor instance_iou_cuda(at::Tensor instance_idx, at::Tensor instance_offset
2525
CHECK_CUDA(gt_instance_sizes);
2626

2727
cudaSetDevice(instance_idx.get_device());
28-
long num_proposed_instances = instance_offsets.size(0) - 1;
28+
int64_t num_proposed_instances = instance_offsets.size(0) - 1;
2929
auto total_gt_instances = (int64_t*)malloc(sizeof(int64_t));
3030
cudaMemcpy(total_gt_instances, num_gt_instances.sum().DATA_PTR<int64_t>(), sizeof(int64_t),
3131
cudaMemcpyDeviceToHost);
@@ -40,10 +40,10 @@ at::Tensor instance_iou_cuda(at::Tensor instance_idx, at::Tensor instance_offset
4040
at::Tensor offset_num_gt_instances =
4141
at::cat({at::zeros(1, num_gt_instances.options()), num_gt_instances.cumsum(0)}, 0);
4242
instance_iou_kernel_wrapper(
43-
total_gt_instances[0], max_gt_instances[0], num_gt_instances.DATA_PTR<long>(),
44-
num_proposed_instances, instance_idx.DATA_PTR<long>(), instance_offsets.DATA_PTR<long>(),
45-
gt_instances.DATA_PTR<long>(), offset_num_gt_instances.DATA_PTR<long>(),
46-
batch.DATA_PTR<long>(), gt_instance_sizes.DATA_PTR<long>(), output.DATA_PTR<float>());
43+
total_gt_instances[0], max_gt_instances[0], num_gt_instances.DATA_PTR<int64_t>(),
44+
num_proposed_instances, instance_idx.DATA_PTR<int64_t>(), instance_offsets.DATA_PTR<int64_t>(),
45+
gt_instances.DATA_PTR<int64_t>(), offset_num_gt_instances.DATA_PTR<int64_t>(),
46+
batch.DATA_PTR<int64_t>(), gt_instance_sizes.DATA_PTR<int64_t>(), output.DATA_PTR<float>());
4747

4848
return output;
4949
}

cuda/src/metrics_gpu.cu

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
#define THREADS 512
88

99
__global__ void instance_iou_cuda_kernel(
10-
long total_gt_instances, const long* __restrict__ nInstance, int nProposal,
11-
const long* __restrict__ proposals_idx, const long* __restrict__ proposals_offset,
12-
const long* __restrict__ instance_labels, const long* __restrict__ offset_num_gt_instances,
13-
const long* __restrict__ batch, const long* __restrict__ instance_pointnum,
10+
int64_t total_gt_instances, const int64_t* __restrict__ nInstance, int nProposal,
11+
const int64_t* __restrict__ proposals_idx, const int64_t* __restrict__ proposals_offset,
12+
const int64_t* __restrict__ instance_labels, const int64_t* __restrict__ offset_num_gt_instances,
13+
const int64_t* __restrict__ batch, const int64_t* __restrict__ instance_pointnum,
1414
float* proposals_iou)
1515
{
1616
for (int proposal_id = blockIdx.x; proposal_id < nProposal; proposal_id += gridDim.x)
@@ -44,18 +44,18 @@ __global__ void instance_iou_cuda_kernel(
4444

4545
// input: proposals_idx (sumNPoint), int
4646
// input: proposals_offset (nProposal + 1), int
47-
// input: instance_labels (N), long, 0~total_nInst-1, -100
47+
// input: instance_labels (N), int64_t, 0~total_nInst-1, -100
4848
// input: instance_pointnum (total_nInst), int
4949
// output: proposals_iou (nProposal, total_nInst), float
50-
void instance_iou_kernel_wrapper(long total_gt_instances, long max_gt_instances,
51-
const long* nInstance, int nProposal, const long* proposals_idx,
52-
const long* proposals_offset, const long* instance_labels,
53-
const long* offset_num_gt_instances, const long* batch,
54-
const long* instance_pointnum, float* proposals_iou)
50+
void instance_iou_kernel_wrapper(int64_t total_gt_instances, int64_t max_gt_instances,
51+
const int64_t* nInstance, int nProposal, const int64_t* proposals_idx,
52+
const int64_t* proposals_offset, const int64_t* instance_labels,
53+
const int64_t* offset_num_gt_instances, const int64_t* batch,
54+
const int64_t* instance_pointnum, float* proposals_iou)
5555
{
5656
auto stream = at::cuda::getCurrentCUDAStream();
5757
instance_iou_cuda_kernel<<<std::min(nProposal, THREADS * THREADS),
58-
std::min(max_gt_instances, (long)THREADS), 0, stream>>>(
58+
std::min(max_gt_instances, (int64_t)THREADS), 0, stream>>>(
5959
total_gt_instances, nInstance, nProposal, proposals_idx, proposals_offset, instance_labels,
6060
offset_num_gt_instances, batch, instance_pointnum, proposals_iou);
6161
}

0 commit comments

Comments
 (0)