|
3 | 3 | // |
4 | 4 | #include <fvdb/detail/GridBatchImpl.h> |
5 | 5 | #include <fvdb/detail/ops/SerializeEncode.h> |
6 | | -#include <fvdb/detail/utils/AccessorHelpers.cuh> |
7 | | -#include <fvdb/detail/utils/ForEachCPU.h> |
8 | 6 | #include <fvdb/detail/utils/HilbertCode.h> |
9 | 7 | #include <fvdb/detail/utils/MortonCode.h> |
10 | | -#include <fvdb/detail/utils/cuda/ForEachCUDA.cuh> |
11 | | -#include <fvdb/detail/utils/cuda/ForEachPrivateUse1.cuh> |
12 | | - |
13 | | -#include <c10/cuda/CUDAException.h> |
| 8 | +#include <fvdb/detail/utils/SimpleOpHelper.h> |
14 | 9 |
|
15 | 10 | #include <cuda_runtime.h> |
16 | 11 |
|
17 | | -#include <vector> |
18 | | - |
19 | 12 | namespace fvdb { |
20 | 13 | namespace detail { |
21 | 14 | namespace ops { |
22 | 15 |
|
23 | | -/// @brief Per-voxel callback which computes the space-filling curve code (Morton or Hilbert) for |
24 | | -/// each active voxel in a batch of grids |
25 | | -template <template <typename T, int32_t D> typename TorchAccessor> |
26 | | -__hostdev__ inline void |
27 | | -serializeEncodeVoxelCallback(int64_t batchIdx, |
28 | | - int64_t leafIdx, |
29 | | - int64_t voxelIdx, |
30 | | - GridBatchImpl::Accessor gridAccessor, |
31 | | - TorchAccessor<int64_t, 2> outMortonCodes, |
32 | | - const nanovdb::Coord &offset, |
33 | | - int order_type) { |
34 | | - const nanovdb::OnIndexGrid *grid = gridAccessor.grid(batchIdx); |
35 | | - const typename nanovdb::OnIndexGrid::LeafNodeType &leaf = |
36 | | - grid->tree().template getFirstNode<0>()[leafIdx]; |
37 | | - const int64_t baseOffset = gridAccessor.voxelOffset(batchIdx); |
38 | | - |
39 | | - const nanovdb::Coord &ijk = leaf.offsetToGlobalCoord(voxelIdx); |
40 | | - if (leaf.isActive(voxelIdx)) { |
41 | | - const int64_t idx = baseOffset + (int64_t)leaf.getValue(voxelIdx) - 1; |
| 16 | +namespace { |
42 | 17 |
|
| 18 | +template <torch::DeviceType DeviceTag> |
| 19 | +struct Processor : public BaseProcessor<DeviceTag, Processor<DeviceTag>, int64_t> { |
| 20 | + nanovdb::Coord offset = nanovdb::Coord{0, 0, 0}; |
| 21 | + SpaceFillingCurveType order_type = SpaceFillingCurveType::ZOrder; |
| 22 | + |
| 23 | + // Per-voxel callback which computes the space-filling |
| 24 | + // curve code (Morton or Hilbert) for |
| 25 | + // each active voxel in a batch of grids |
| 26 | + __hostdev__ void |
| 27 | + perActiveVoxel(nanovdb::Coord const &ijk, int64_t const feature_idx, auto out_accessor) const { |
43 | 28 | // Apply offset to coordinates |
44 | | - int32_t offset_i = offset[0]; |
45 | | - int32_t offset_j = offset[1]; |
46 | | - int32_t offset_k = offset[2]; |
| 29 | + auto const i = static_cast<uint32_t>(ijk[0] + offset[0]); |
| 30 | + auto const j = static_cast<uint32_t>(ijk[1] + offset[1]); |
| 31 | + auto const k = static_cast<uint32_t>(ijk[2] + offset[2]); |
47 | 32 |
|
48 | 33 | // Compute Morton or Hilbert code with offset to ensure non-negative coordinates |
49 | 34 | uint64_t space_filling_code; |
50 | | - switch (static_cast<SpaceFillingCurveType>(order_type)) { |
51 | | - case SpaceFillingCurveType::ZOrder: // Regular z-order: xyz |
52 | | - space_filling_code = utils::morton_with_offset(ijk[0], |
53 | | - ijk[1], |
54 | | - ijk[2], |
55 | | - static_cast<uint32_t>(offset_i), |
56 | | - static_cast<uint32_t>(offset_j), |
57 | | - static_cast<uint32_t>(offset_k)); |
| 35 | + switch (order_type) { |
| 36 | + case SpaceFillingCurveType::ZOrder: // Regular z-order: xyz |
| 37 | + space_filling_code = utils::morton(i, j, k); |
58 | 38 | break; |
59 | | - case SpaceFillingCurveType::ZOrderTransposed: // Transposed z-order: zyx |
60 | | - space_filling_code = utils::morton_with_offset(ijk[2], |
61 | | - ijk[1], |
62 | | - ijk[0], |
63 | | - static_cast<uint32_t>(offset_k), |
64 | | - static_cast<uint32_t>(offset_j), |
65 | | - static_cast<uint32_t>(offset_i)); |
| 39 | + case SpaceFillingCurveType::ZOrderTransposed: // Transposed z-order: zyx |
| 40 | + space_filling_code = utils::morton(k, j, i); |
66 | 41 | break; |
67 | | - case SpaceFillingCurveType::Hilbert: // Regular Hilbert curve: xyz |
68 | | - space_filling_code = utils::hilbert_with_offset(ijk[0], |
69 | | - ijk[1], |
70 | | - ijk[2], |
71 | | - static_cast<uint32_t>(offset_i), |
72 | | - static_cast<uint32_t>(offset_j), |
73 | | - static_cast<uint32_t>(offset_k)); |
| 42 | + case SpaceFillingCurveType::Hilbert: // Regular Hilbert curve: xyz |
| 43 | + space_filling_code = utils::hilbert(i, j, k); |
74 | 44 | break; |
75 | 45 | case SpaceFillingCurveType::HilbertTransposed: // Transposed Hilbert curve: zyx |
76 | | - space_filling_code = utils::hilbert_with_offset(ijk[2], |
77 | | - ijk[1], |
78 | | - ijk[0], |
79 | | - static_cast<uint32_t>(offset_k), |
80 | | - static_cast<uint32_t>(offset_j), |
81 | | - static_cast<uint32_t>(offset_i)); |
| 46 | + space_filling_code = utils::hilbert(k, j, i); |
82 | 47 | break; |
83 | 48 | default: |
84 | 49 | // Invalid order type - use assert for device code |
85 | 50 | space_filling_code = 0; |
86 | 51 | break; |
87 | 52 | } |
88 | 53 |
|
89 | | - outMortonCodes[idx][0] = static_cast<int64_t>(space_filling_code); |
| 54 | + out_accessor[feature_idx] = static_cast<int64_t>(space_filling_code); |
90 | 55 | } |
91 | | -} |
| 56 | +}; |
92 | 57 |
|
93 | | -/// @brief Get the space-filling curve codes for active voxels in a batch of grids |
94 | | -/// @param gridBatch The batch of grids |
95 | | -/// @param outMortonCodes Tensor which will contain the output space-filling curve codes |
96 | | -/// @param offset Offset to apply to voxel coordinates before encoding |
97 | | -/// @param order_type Integer representing the order type (0=z, 1=z-trans, 2=hilbert, |
98 | | -/// 3=hilbert-trans) |
99 | | -template <torch::DeviceType DeviceTag> |
100 | | -void |
101 | | -GetSerializeEncode(const GridBatchImpl &gridBatch, |
102 | | - torch::Tensor &outMortonCodes, |
103 | | - const nanovdb::Coord &offset, |
104 | | - int order_type) { |
105 | | - auto outCodesAcc = tensorAccessor<DeviceTag, int64_t, 2>(outMortonCodes); |
| 58 | +} // End anonymous namespace |
106 | 59 |
|
107 | | - if constexpr (DeviceTag == torch::kCUDA) { |
108 | | - auto cb = [=] __device__(int64_t batchIdx, |
109 | | - int64_t leafIdx, |
110 | | - int64_t voxelIdx, |
111 | | - int64_t, |
112 | | - GridBatchImpl::Accessor gridAccessor) { |
113 | | - serializeEncodeVoxelCallback<TorchRAcc32>( |
114 | | - batchIdx, leafIdx, voxelIdx, gridAccessor, outCodesAcc, offset, order_type); |
115 | | - }; |
116 | | - forEachVoxelCUDA(1024, 1, gridBatch, cb); |
117 | | - } else if constexpr (DeviceTag == torch::kPrivateUse1) { |
118 | | - auto cb = [=] __device__(int64_t batchIdx, |
119 | | - int64_t leafIdx, |
120 | | - int64_t voxelIdx, |
121 | | - int64_t, |
122 | | - GridBatchImpl::Accessor gridAccessor) { |
123 | | - serializeEncodeVoxelCallback<TorchRAcc32>( |
124 | | - batchIdx, leafIdx, voxelIdx, gridAccessor, outCodesAcc, offset, order_type); |
125 | | - }; |
126 | | - forEachVoxelPrivateUse1(1, gridBatch, cb); |
127 | | - } else { |
128 | | - auto cb = [=](int64_t batchIdx, |
129 | | - int64_t leafIdx, |
130 | | - int64_t voxelIdx, |
131 | | - int64_t, |
132 | | - GridBatchImpl::Accessor gridAccessor) { |
133 | | - serializeEncodeVoxelCallback<TorchAcc>( |
134 | | - batchIdx, leafIdx, voxelIdx, gridAccessor, outCodesAcc, offset, order_type); |
135 | | - }; |
136 | | - forEachVoxelCPU(1, gridBatch, cb); |
137 | | - } |
138 | | -} |
139 | | - |
140 | | -/// @brief Get the space-filling curve codes for active voxels in a batch of grids |
141 | | -/// @tparam DeviceTag Which device to run on |
142 | | -/// @param gridBatch The batch of grids to get the space-filling curve codes for |
143 | | -/// @param order_type The type of space-filling curve to use for encoding |
144 | | -/// @param offset Offset to apply to voxel coordinates before encoding |
145 | | -/// @return A JaggedTensor of shape [B, -1, 1] of space-filling curve codes for active voxels |
146 | 60 | template <torch::DeviceType DeviceTag> |
147 | 61 | JaggedTensor |
148 | | -SerializeEncode(const GridBatchImpl &gridBatch, |
149 | | - SpaceFillingCurveType order_type, |
150 | | - const nanovdb::Coord &offset) { |
151 | | - gridBatch.checkNonEmptyGrid(); |
152 | | - auto opts = torch::TensorOptions().dtype(torch::kInt64).device(gridBatch.device()); |
153 | | - torch::Tensor outMortonCodes = torch::empty({gridBatch.totalVoxels(), 1}, opts); |
154 | | - |
155 | | - // Convert enum to integer for kernel |
156 | | - const int order_type_int = static_cast<int>(order_type); |
157 | | - |
158 | | - GetSerializeEncode<DeviceTag>(gridBatch, outMortonCodes, offset, order_type_int); |
159 | | - |
160 | | - return gridBatch.jaggedTensor(outMortonCodes); |
161 | | -} |
162 | | - |
163 | | -template <> |
164 | | -JaggedTensor |
165 | | -dispatchSerializeEncode<torch::kCUDA>(const GridBatchImpl &gridBatch, |
166 | | - SpaceFillingCurveType order_type, |
167 | | - const nanovdb::Coord &offset) { |
168 | | - return SerializeEncode<torch::kCUDA>(gridBatch, order_type, offset); |
| 62 | +dispatchSerializeEncode(GridBatchImpl const &gridBatch, |
| 63 | + SpaceFillingCurveType order_type, |
| 64 | + nanovdb::Coord const &offset) { |
| 65 | + Processor<DeviceTag> processor{.offset = offset, .order_type = order_type}; |
| 66 | + return processor.execute(gridBatch); |
169 | 67 | } |
170 | 68 |
|
171 | | -template <> |
172 | | -JaggedTensor |
173 | | -dispatchSerializeEncode<torch::kCPU>(const GridBatchImpl &gridBatch, |
174 | | - SpaceFillingCurveType order_type, |
175 | | - const nanovdb::Coord &offset) { |
176 | | - return SerializeEncode<torch::kCPU>(gridBatch, order_type, offset); |
177 | | -} |
178 | | - |
179 | | -template <> |
180 | | -JaggedTensor |
181 | | -dispatchSerializeEncode<torch::kPrivateUse1>(const GridBatchImpl &gridBatch, |
182 | | - SpaceFillingCurveType order_type, |
183 | | - const nanovdb::Coord &offset) { |
184 | | - return SerializeEncode<torch::kPrivateUse1>(gridBatch, order_type, offset); |
185 | | -} |
| 69 | +template JaggedTensor dispatchSerializeEncode<torch::kCUDA>(GridBatchImpl const &, |
| 70 | + SpaceFillingCurveType, |
| 71 | + nanovdb::Coord const &); |
| 72 | +template JaggedTensor dispatchSerializeEncode<torch::kCPU>(GridBatchImpl const &, |
| 73 | + SpaceFillingCurveType, |
| 74 | + nanovdb::Coord const &); |
| 75 | +template JaggedTensor dispatchSerializeEncode<torch::kPrivateUse1>(GridBatchImpl const &, |
| 76 | + SpaceFillingCurveType, |
| 77 | + nanovdb::Coord const &); |
186 | 78 |
|
187 | 79 | } // namespace ops |
188 | 80 | } // namespace detail |
|
0 commit comments