|
6 | 6 | \ingroup projection |
7 | 7 | \ingroup SPECTGPU |
8 | 8 |
|
9 | | - \brief implementations for cuda kernel for rotating projector with gaussian interpolation |
| 9 | + \brief implementations for cuda kernel for rotating projector with gaussian interpolation |
10 | 10 | a la Wallis et al 1997,TMI, doi: 10.1109/42.552061. |
11 | 11 |
|
12 | 12 | \author Daniel Deidda |
|
26 | 26 | #include <cuda_runtime.h> |
27 | 27 | #include <numeric> |
28 | 28 |
|
29 | | -//the following is a pull operation |
30 | | -__global__ void rotateKernel_pull(const float* __restrict__ in_im, |
31 | | - float* __restrict__ out_im, |
32 | | - int3 dim, |
33 | | - float3 spacing, |
34 | | - float3 origin, |
35 | | - float angle_rad) |
36 | | - { |
| 29 | +// the following is a pull operation |
| 30 | +__global__ void |
| 31 | +rotateKernel_pull( |
| 32 | + const float* __restrict__ in_im, float* __restrict__ out_im, int3 dim, float3 spacing, float3 origin, float angle_rad) |
| 33 | +{ |
37 | 34 | // parallelise the operation across all image voxels |
38 | 35 | int i = threadIdx.x + blockDim.x * blockIdx.x; |
39 | 36 | int j = threadIdx.y + blockDim.y * blockIdx.y; |
40 | 37 | int k = threadIdx.z + blockDim.z * blockIdx.z; |
41 | 38 |
|
42 | 39 | // check we are not outside the image |
43 | | - if (i >= dim.x || j >= dim.y || k >= dim.z) return; |
| 40 | + if (i >= dim.x || j >= dim.y || k >= dim.z) |
| 41 | + return; |
44 | 42 |
|
45 | 43 | // get the 1-dimensional index |
46 | 44 | int idx = i + (j * dim.x) + (k * dim.x * dim.y); |
@@ -68,86 +66,88 @@ __global__ void rotateKernel_pull(const float* __restrict__ in_im, |
68 | 66 | // first loop is for finding the value of a gaussian kernel at |
69 | 67 | // each nearest neighbour position and summing them all. This will allow normalisation |
70 | 68 | // of each NN contribution in the next loop. |
71 | | - float G = 0; // accumulator variable |
72 | | - float sigma = 1; // gaussian kernel sigma |
73 | | - |
74 | | - for (int dr = -1; dr <= 1; dr++) { |
75 | | - int r = (int)roundf(r_r) + dr; // nearest neighbour z coordinate in rotated voxel space |
76 | | - float x_r = (r * spacing.z) - origin.z; // converted to image space |
77 | | - |
78 | | - for (int dq = -1; dq <= 1; dq++) { |
79 | | - int q = (int)roundf(q_r) + dq; |
80 | | - float x_q = (q * spacing.y) - origin.y; |
81 | | - |
82 | | - for (int dp = -1; dp <= 1; dp++) { |
83 | | - int p = (int)roundf(p_r) + dp; |
84 | | - float x_p = (p * spacing.x) - origin.x; |
85 | | - |
86 | | - // get distance between nearest neighbour and central voxel |
87 | | - // both need to be in image space |
88 | | - float delta_x = X_rot_x - x_p; |
89 | | - float delta_y = X_rot_y - x_q; |
90 | | - float delta_z = X_rot_z - x_r; |
91 | | - |
92 | | - // caulculate gaussian kernel |
93 | | - float g = expf(-1. * ((delta_x*delta_x) + (delta_y*delta_y) + (delta_z*delta_z)) / (2*sigma*sigma)) |
94 | | - G += g; |
95 | | - }; |
| 69 | + float G = 0; // accumulator variable |
| 70 | + float sigma = 1; // gaussian kernel sigma |
| 71 | + |
| 72 | + for (int dr = -1; dr <= 1; dr++) |
| 73 | + { |
| 74 | + int r = (int)roundf(r_r) + dr; // nearest neighbour z coordinate in rotated voxel space |
| 75 | + float x_r = (r * spacing.z) - origin.z; // converted to image space |
| 76 | + |
| 77 | + for (int dq = -1; dq <= 1; dq++) |
| 78 | + { |
| 79 | + int q = (int)roundf(q_r) + dq; |
| 80 | + float x_q = (q * spacing.y) - origin.y; |
| 81 | + |
| 82 | + for (int dp = -1; dp <= 1; dp++) |
| 83 | + { |
| 84 | + int p = (int)roundf(p_r) + dp; |
| 85 | + float x_p = (p * spacing.x) - origin.x; |
| 86 | + |
| 87 | + // get distance between nearest neighbour and central voxel |
| 88 | + // both need to be in image space |
| 89 | + float delta_x = X_rot_x - x_p; |
| 90 | + float delta_y = X_rot_y - x_q; |
| 91 | + float delta_z = X_rot_z - x_r; |
| 92 | + |
| 93 | + // caulculate gaussian kernel |
| 94 | + float g = expf(-1. * ((delta_x * delta_x) + (delta_y * delta_y) + (delta_z * delta_z)) / (2 * sigma * sigma)) G |
| 95 | + += g; |
| 96 | + }; |
| 97 | + }; |
96 | 98 | }; |
97 | | - }; |
98 | 99 |
|
99 | 100 | // loop again but this time actually fetch the image values |
100 | 101 | float accumulation = 0; |
101 | 102 |
|
102 | | - for (int dr = -1; dr <= 1; dr++) { |
103 | | - float r = dr + (int)roundf(r_r); |
104 | | - float x_r = origin.z + (r * spacing.z); |
| 103 | + for (int dr = -1; dr <= 1; dr++) |
| 104 | + { |
| 105 | + float r = dr + (int)roundf(r_r); |
| 106 | + float x_r = origin.z + (r * spacing.z); |
105 | 107 |
|
106 | | - for (int dq = -1; dq <= 1; dq++) { |
107 | | - float q = dq + (int)roundf(q_r); |
108 | | - float x_q = origin.y + (q * spacing.y); |
| 108 | + for (int dq = -1; dq <= 1; dq++) |
| 109 | + { |
| 110 | + float q = dq + (int)roundf(q_r); |
| 111 | + float x_q = origin.y + (q * spacing.y); |
109 | 112 |
|
110 | | - for (int dp = -1; dp <= 1; dp++) { |
111 | | - float p = dp + (int)roundf(p_r); |
112 | | - float x_p = origin.x + (p * spacing.x); |
| 113 | + for (int dp = -1; dp <= 1; dp++) |
| 114 | + { |
| 115 | + float p = dp + (int)roundf(p_r); |
| 116 | + float x_p = origin.x + (p * spacing.x); |
113 | 117 |
|
114 | | - float delta_x = Xr_x - x_p; |
115 | | - float delta_y = Xr_y - x_q; |
116 | | - float delta_z = Xr_z - x_r; |
| 118 | + float delta_x = Xr_x - x_p; |
| 119 | + float delta_y = Xr_y - x_q; |
| 120 | + float delta_z = Xr_z - x_r; |
117 | 121 |
|
118 | | - // get input image voxel index that we are at |
119 | | - int i_idx = p + (q * dim.x) + (r * dim.x * dim.y); |
| 122 | + // get input image voxel index that we are at |
| 123 | + int i_idx = p + (q * dim.x) + (r * dim.x * dim.y); |
120 | 124 |
|
121 | | - // calculate gaussian weighting for this voxel |
122 | | - float g = expf(-1. * ((delta_x*delta_x) + (delta_y*delta_y) + (delta_z*delta_z)) / (2*sigma*sigma)); |
| 125 | + // calculate gaussian weighting for this voxel |
| 126 | + float g = expf(-1. * ((delta_x * delta_x) + (delta_y * delta_y) + (delta_z * delta_z)) / (2 * sigma * sigma)); |
123 | 127 |
|
124 | | - // record the weighted value from this voxel |
125 | | - accumulation += in_im[i_idx] * g / G; |
126 | | - |
127 | | - }; |
| 128 | + // record the weighted value from this voxel |
| 129 | + accumulation += in_im[i_idx] * g / G; |
| 130 | + }; |
| 131 | + }; |
128 | 132 | }; |
129 | | - }; |
130 | 133 |
|
131 | 134 | // assign the pulled voxel values to a single voxel in the output image |
132 | 135 | out_im[idx] = accumulation; |
133 | | - |
134 | 136 | }; |
135 | 137 |
|
136 | 138 | // the following is the adjoint operation (push) |
137 | | -__global__ void rotateKernel_push(const float* __restrict__ in_im, |
138 | | - float* __restrict__ out_im, |
139 | | - int3 dim, |
140 | | - float3 spacing, |
141 | | - float3 origin, |
142 | | - float angle_rad) |
143 | | - { |
144 | | - // parallelise the operation across all image voxels |
| 139 | +__global__ void |
| 140 | +rotateKernel_push( |
| 141 | + const float* __restrict__ in_im, float* __restrict__ out_im, int3 dim, float3 spacing, float3 origin, float angle_rad) |
| 142 | +{ |
| 143 | + // parallelise the operation across all image voxels |
145 | 144 | int i = threadIdx.x + blockDim.x * blockIdx.x; |
146 | 145 | int j = threadIdx.y + blockDim.y * blockIdx.y; |
147 | 146 | int k = threadIdx.z + blockDim.z * blockIdx.z; |
148 | 147 |
|
149 | 148 | // check we are not outside the image |
150 | | - if (i >= dim.x || j >= dim.y || k >= dim.z) return; |
| 149 | + if (i >= dim.x || j >= dim.y || k >= dim.z) |
| 150 | + return; |
151 | 151 |
|
152 | 152 | // get the 1-dimensional index |
153 | 153 | int idx = i + (j * dim.x) + (k * dim.x * dim.y); |
@@ -175,61 +175,66 @@ __global__ void rotateKernel_push(const float* __restrict__ in_im, |
175 | 175 | // first loop is for finding the value of a gaussian kernel at |
176 | 176 | // each nearest neighbour position and summing them all. This will allow normalisation |
177 | 177 | // of each NN contribution in the next loop. |
178 | | - float G = 0; // accumulator variable |
179 | | - float sigma = 1; // gaussian kernel sigma |
180 | | - |
181 | | - for (int dr = -1; dr <= 1; dr++) { |
182 | | - int r = (int)roundf(r_r) + dr; // nearest neighbour z coordinate in rotated voxel space |
183 | | - float x_r = (r * spacing.z) - origin.z; // converted to image space |
184 | | - |
185 | | - for (int dq = -1; dq <= 1; dq++) { |
186 | | - int q = (int)roundf(q_r) + dq; |
187 | | - float x_q = (q * spacing.y) - origin.y; |
188 | | - |
189 | | - for (int dp = -1; dp <= 1; dp++) { |
190 | | - int p = (int)roundf(p_r) + dp; |
191 | | - float x_p = (p * spacing.x) - origin.x; |
192 | | - |
193 | | - // get distance between nearest neighbour and central voxel |
194 | | - // both need to be in image space |
195 | | - float delta_x = X_rot_x - x_p; |
196 | | - float delta_y = X_rot_y - x_q; |
197 | | - float delta_z = X_rot_z - x_r; |
198 | | - |
199 | | - // caulculate gaussian kernel |
200 | | - float g = expf(-1. * ((delta_x*delta_x) + (delta_y*delta_y) + (delta_z*delta_z)) / (2*sigma*sigma)) |
201 | | - G += g; |
202 | | - }; |
| 178 | + float G = 0; // accumulator variable |
| 179 | + float sigma = 1; // gaussian kernel sigma |
| 180 | + |
| 181 | + for (int dr = -1; dr <= 1; dr++) |
| 182 | + { |
| 183 | + int r = (int)roundf(r_r) + dr; // nearest neighbour z coordinate in rotated voxel space |
| 184 | + float x_r = (r * spacing.z) - origin.z; // converted to image space |
| 185 | + |
| 186 | + for (int dq = -1; dq <= 1; dq++) |
| 187 | + { |
| 188 | + int q = (int)roundf(q_r) + dq; |
| 189 | + float x_q = (q * spacing.y) - origin.y; |
| 190 | + |
| 191 | + for (int dp = -1; dp <= 1; dp++) |
| 192 | + { |
| 193 | + int p = (int)roundf(p_r) + dp; |
| 194 | + float x_p = (p * spacing.x) - origin.x; |
| 195 | + |
| 196 | + // get distance between nearest neighbour and central voxel |
| 197 | + // both need to be in image space |
| 198 | + float delta_x = X_rot_x - x_p; |
| 199 | + float delta_y = X_rot_y - x_q; |
| 200 | + float delta_z = X_rot_z - x_r; |
| 201 | + |
| 202 | + // caulculate gaussian kernel |
| 203 | + float g = expf(-1. * ((delta_x * delta_x) + (delta_y * delta_y) + (delta_z * delta_z)) / (2 * sigma * sigma)) G |
| 204 | + += g; |
| 205 | + }; |
| 206 | + }; |
203 | 207 | }; |
204 | | - }; |
205 | 208 |
|
206 | 209 | // loop again but this time actually fetch the image values |
207 | 210 | float accumulation = 0; |
208 | 211 |
|
209 | | - for (int dr = -1; dr <= 1; dr++) { |
210 | | - float r = dr + (int)roundf(r_r); |
211 | | - float x_r = origin.z + (r * spacing.z); |
212 | | - |
213 | | - for (int dq = -1; dq <= 1; dq++) { |
214 | | - float q = dq + (int)roundf(q_r); |
215 | | - float x_q = origin.y + (q * spacing.y); |
216 | | - |
217 | | - for (int dp = -1; dp <= 1; dp++) { |
218 | | - float p = dp + (int)roundf(p_r); |
219 | | - float x_p = origin.x + (p * spacing.x); |
220 | | - |
221 | | - float delta_x = Xr_x - x_p; |
222 | | - float delta_y = Xr_y - x_q; |
223 | | - float delta_z = Xr_z - x_r; |
224 | | - |
225 | | - // get input image voxel index that we are at |
226 | | - int i_idx = p + (q * dim.x) + (r * dim.x * dim.y); |
227 | | - |
228 | | - // Pushing the weighted counts to NN |
229 | | - float g = expf(-1. * ((delta_x*delta_x) + (delta_y*delta_y) + (delta_z*delta_z)) / (2*sigma*sigma)); |
230 | | - atomicAdd(&out_im[i_idx], in_im[idx]*g/G); |
231 | | - |
232 | | - }; |
| 212 | + for (int dr = -1; dr <= 1; dr++) |
| 213 | + { |
| 214 | + float r = dr + (int)roundf(r_r); |
| 215 | + float x_r = origin.z + (r * spacing.z); |
| 216 | + |
| 217 | + for (int dq = -1; dq <= 1; dq++) |
| 218 | + { |
| 219 | + float q = dq + (int)roundf(q_r); |
| 220 | + float x_q = origin.y + (q * spacing.y); |
| 221 | + |
| 222 | + for (int dp = -1; dp <= 1; dp++) |
| 223 | + { |
| 224 | + float p = dp + (int)roundf(p_r); |
| 225 | + float x_p = origin.x + (p * spacing.x); |
| 226 | + |
| 227 | + float delta_x = Xr_x - x_p; |
| 228 | + float delta_y = Xr_y - x_q; |
| 229 | + float delta_z = Xr_z - x_r; |
| 230 | + |
| 231 | + // get input image voxel index that we are at |
| 232 | + int i_idx = p + (q * dim.x) + (r * dim.x * dim.y); |
| 233 | + |
| 234 | + // Pushing the weighted counts to NN |
| 235 | + float g = expf(-1. * ((delta_x * delta_x) + (delta_y * delta_y) + (delta_z * delta_z)) / (2 * sigma * sigma)); |
| 236 | + atomicAdd(&out_im[i_idx], in_im[idx] * g / G); |
| 237 | + }; |
| 238 | + }; |
233 | 239 | }; |
234 | | - }; |
235 | 240 | } |
0 commit comments