Skip to content

Commit edc60a8

Browse files
author
david.roddy
committed
fixup! joining all contributions; created an actual_forward_project() that calls the kernels; we need to double check that the kernel can write into the sino but probably we need to dodevice_to_host(stir_sino,cuda_array_created_by_forward)
1 parent aec9adf commit edc60a8

File tree

1 file changed

+2
-96
lines changed

1 file changed

+2
-96
lines changed

src/recon_buildblock/SPECTGPU_projector/SPECTGPURotateAndGaussianInterpolate.cu

Lines changed: 2 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@ rotateKernel_pull(
136136
};
137137

138138
// the following is the adjoint operation (push)
139-
<<<<<<< Updated upstream
140139
__global__ void
141140
rotateKernel_push(
142141
const float* __restrict__ in_im, float* __restrict__ out_im, int3 dim, float3 spacing, float3 origin, float angle_rad)
@@ -203,81 +202,13 @@ rotateKernel_push(
203202
// caulculate gaussian kernel
204203
float g = expf(-1. * ((delta_x * delta_x) + (delta_y * delta_y) + (delta_z * delta_z)) / (2 * sigma * sigma)) G
205204
+= g;
206-
=======
207-
__global__ void rotateKernel_push(const float* __restrict__ in_im,
208-
float* __restrict__ out_im,
209-
int3 dim,
210-
float3 spacing,
211-
float3 origin,
212-
float angle_rad)
213-
{
214-
// parallelise the operation across all image voxels
215-
int i = threadIdx.x + blockDim.x * blockIdx.x;
216-
int j = threadIdx.y + blockDim.y * blockIdx.y;
217-
int k = threadIdx.z + blockDim.z * blockIdx.z;
218-
219-
// check we are not outside the image
220-
if (i >= dim.x || j >= dim.y || k >= dim.z) return;
221-
222-
// get the 1-dimensional index
223-
int idx = i + (j * dim.x) + (k * dim.x * dim.y);
224-
225-
// move to centre of voxel and convert from voxel to image space
226-
float Xs_rel_x = (i + 0.5f) * spacing.x - origin.x;
227-
float Xs_rel_y = (j + 0.5f) * spacing.y - origin.y;
228-
float Xs_rel_z = (k + 0.5f) * spacing.z - origin.z;
229-
230-
// perform the rotation by 'angle_rad'
231-
float X_rot_x = (Xs_rel_x * cosf(angle_rad)) - (Xs_rel_y * sinf(angle_rad));
232-
float X_rot_y = (Xs_rel_x * sinf(angle_rad)) + (Xs_rel_y * cosf(angle_rad));
233-
float X_rot_z = Xs_rel_z;
234-
235-
// convert back to voxel space origin (corner of image) but keep image space dimensions
236-
float Xr_x = X_rot_x + origin.x;
237-
float Xr_y = X_rot_y + origin.y;
238-
float Xr_z = X_rot_z + origin.z;
239-
240-
// now we go fully back to voxel space (but now it's been rotated)
241-
float p_r = Xr_x / spacing.x;
242-
float q_r = Xr_y / spacing.y;
243-
float r_r = Xr_z / spacing.z;
244-
245-
// first loop is for finding the value of a gaussian kernel at
246-
// each nearest neighbour position and summing them all. This will allow normalisation
247-
// of each NN contribution in the next loop.
248-
float G = 0; // accumulator variable
249-
float sigma = 1; // gaussian kernel sigma
250-
251-
for (int dr = -1; dr <= 1; dr++) {
252-
int r = (int)roundf(r_r) + dr; // nearest neighbour z coordinate in rotated voxel space
253-
float x_r = (r * spacing.z) - origin.z; // converted to image space
254-
255-
for (int dq = -1; dq <= 1; dq++) {
256-
int q = (int)roundf(q_r) + dq;
257-
float x_q = (q * spacing.y) - origin.y;
258-
259-
for (int dp = -1; dp <= 1; dp++) {
260-
int p = (int)roundf(p_r) + dp;
261-
float x_p = (p * spacing.x) - origin.x;
262-
263-
// get distance between nearest neighbour and central voxel
264-
// both need to be in image space
265-
float delta_x = X_rot_x - x_p;
266-
float delta_y = X_rot_y - x_q;
267-
float delta_z = X_rot_z - x_r;
268-
269-
// caulculate gaussian kernel
270-
float g = expf(-1. * ((delta_x*delta_x) + (delta_y*delta_y) + (delta_z*delta_z)) / (2*sigma*sigma))
271-
G += g;
272-
>>>>>>> Stashed changes
273205
};
274206
};
275207
};
276208

277-
// loop again but this time actually fetch the image values
278-
float accumulation = 0;
209+
// loop again but this time actually fetch the image values
210+
float accumulation = 0;
279211

280-
<<<<<<< Updated upstream
281212
for (int dr = -1; dr <= 1; dr++)
282213
{
283214
float r = dr + (int)roundf(r_r);
@@ -303,31 +234,6 @@ __global__ void rotateKernel_push(const float* __restrict__ in_im,
303234
// Pushing the weighted counts to NN
304235
float g = expf(-1. * ((delta_x * delta_x) + (delta_y * delta_y) + (delta_z * delta_z)) / (2 * sigma * sigma));
305236
atomicAdd(&out_im[i_idx], in_im[idx] * g / G);
306-
=======
307-
for (int dr = -1; dr <= 1; dr++) {
308-
float r = dr + (int)roundf(r_r);
309-
float x_r = origin.z + (r * spacing.z);
310-
311-
for (int dq = -1; dq <= 1; dq++) {
312-
float q = dq + (int)roundf(q_r);
313-
float x_q = origin.y + (q * spacing.y);
314-
315-
for (int dp = -1; dp <= 1; dp++) {
316-
float p = dp + (int)roundf(p_r);
317-
float x_p = origin.x + (p * spacing.x);
318-
319-
float delta_x = Xr_x - x_p;
320-
float delta_y = Xr_y - x_q;
321-
float delta_z = Xr_z - x_r;
322-
323-
// get input image voxel index that we are at
324-
int i_idx = p + (q * dim.x) + (r * dim.x * dim.y);
325-
326-
// Pushing the weighted counts to NN
327-
float g = expf(-1. * ((delta_x*delta_x) + (delta_y*delta_y) + (delta_z*delta_z)) / (2*sigma*sigma));
328-
atomicAdd(&out_im[i_idx], in_im[idx]*g/G);
329-
330-
>>>>>>> Stashed changes
331237
};
332238
};
333239
};

0 commit comments

Comments
 (0)