|
| 1 | +#include "rw_cuda.h" |
| 2 | + |
| 3 | +#include <ATen/cuda/CUDAContext.h> |
| 4 | + |
| 5 | +#include "utils.cuh" |
| 6 | + |
| 7 | +#define THREADS 1024 |
| 8 | +#define BLOCKS(N) (N + THREADS - 1) / THREADS |
| 9 | + |
| 10 | +__global__ void uniform_random_walk_kernel(const int64_t *rowptr, |
| 11 | + const int64_t *col, |
| 12 | + const int64_t *start, |
| 13 | + const float *rand, int64_t *out, |
| 14 | + int64_t walk_length, int64_t numel) { |
| 15 | + const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; |
| 16 | + |
| 17 | + if (thread_idx < numel) { |
| 18 | + int64_t cur = start[thread_idx]; |
| 19 | + out[thread_idx] = cur; |
| 20 | + |
| 21 | + int64_t row_start, row_end; |
| 22 | + for (int64_t l = 0; l < walk_length; l++) { |
| 23 | + row_start = rowptr[cur], row_end = rowptr[cur + 1]; |
| 24 | + cur = col[row_start + |
| 25 | + int64_t(rand[l * numel + thread_idx] * (row_end - row_start))]; |
| 26 | + out[(l + 1) * numel + thread_idx] = cur; |
| 27 | + } |
| 28 | + } |
| 29 | +} |
| 30 | + |
| 31 | +torch::Tensor random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, |
| 32 | + torch::Tensor start, int64_t walk_length) { |
| 33 | + CHECK_CUDA(rowptr); |
| 34 | + CHECK_CUDA(col); |
| 35 | + CHECK_CUDA(start); |
| 36 | + cudaSetDevice(rowptr.get_device()); |
| 37 | + |
| 38 | + CHECK_INPUT(rowptr.dim() == 1); |
| 39 | + CHECK_INPUT(col.dim() == 1); |
| 40 | + CHECK_INPUT(start.dim() == 1); |
| 41 | + |
| 42 | + auto rand = torch::rand({walk_length, start.size(0)}, |
| 43 | + start.options().dtype(torch::kFloat)); |
| 44 | + auto out = torch::full({walk_length + 1, start.size(0)}, -1, start.options()); |
| 45 | + |
| 46 | + auto stream = at::cuda::getCurrentCUDAStream(); |
| 47 | + uniform_random_walk_kernel<<<BLOCKS(start.numel()), THREADS, 0, stream>>>( |
| 48 | + rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(), |
| 49 | + start.data_ptr<int64_t>(), rand.data_ptr<float>(), |
| 50 | + out.data_ptr<int64_t>(), walk_length, start.numel()); |
| 51 | + |
| 52 | + return out.t().contiguous(); |
| 53 | +} |
0 commit comments