Skip to content

Commit aec9adf

Browse files
committed
joining all contributions; created an actual_forward_project() that calls the kernels; we need to double check that the kernel can write into the sino but probably we need to dodevice_to_host(stir_sino,cuda_array_created_by_forward)
1 parent 8dfbdd1 commit aec9adf

File tree

7 files changed

+272
-162
lines changed

7 files changed

+272
-162
lines changed

src/include/stir/recon_buildblock/SPECTGPU_projector/BackProjectorByBinSPECTGPU.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ class BackProjectorByBinSPECTGPU : public RegisteredParsingObject<BackProjectorB
8282
const int min_tangential_pos_num,
8383
const int max_tangential_pos_num);
8484

85+
virtual void actual_back_project(DiscretisedDensity<3, float>& stir_image,
86+
const RelatedViewgrams<float>&,
87+
const int min_axial_pos_num,
88+
const int max_axial_pos_num,
89+
const int min_tangential_pos_num,
90+
const int max_tangential_pos_num);
91+
8592
private:
8693
shared_ptr<DataSymmetriesForViewSegmentNumbers> _symmetries_sptr;
8794
SPECTGPUHelper _helper;

src/include/stir/recon_buildblock/SPECTGPU_projector/ForwardProjectorByBinSPECTGPU.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,19 @@ class ForwardProjectorByBinSPECTGPU : public RegisteredParsingObject<ForwardProj
9494
const int min_tangential_pos_num,
9595
const int max_tangential_pos_num);
9696

97+
protected:
98+
struct cppdim3
99+
{
100+
int x;
101+
int y;
102+
int z;
103+
};
104+
105+
int z_dim, y_dim, x_dim;
106+
cppdim3 block_dim;
107+
cppdim3 grid_dim;
108+
97109
private:
98-
shared_ptr<DataSymmetriesForViewSegmentNumbers> _symmetries_sptr;
99110
shared_ptr<ProjDataInMemory> _projected_data_sptr;
100111
SPECTGPUHelper _helper;
101112
int _cuda_device;

src/recon_buildblock/SPECTGPU_projector/BackProjectorByBinSPECTGPU.cxx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,11 @@ BackProjectorByBinSPECTGPU::start_accumulating_in_new_target()
120120
}
121121

122122
void
123-
BackProjectorByBinSPECTGPU::actual_back_project(
123+
BackProjectorByBinSPECTGPU::actual_back_project(DiscretisedDensity<3, float> &stir_image,
124124
const RelatedViewgrams<float>& related_viewgrams, const int, const int, const int, const int)
125125
{
126-
for (stir::RelatedViewgrams<float>::const_iterator iter = related_viewgrams.begin(); iter != related_viewgrams.end(); ++iter)
127-
_helper.convert_viewgram_stir_to_SPECTGPU(_np_sino_w_gaps, *iter);
126+
127+
// call the kernels for backward
128128
}
129129

130130
END_NAMESPACE_STIR

src/recon_buildblock/SPECTGPU_projector/ForwardProjectorByBinSPECTGPU.cxx

Lines changed: 88 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,35 @@ ForwardProjectorByBinSPECTGPU::set_up(const shared_ptr<const ProjDataInfo>& proj
6161
{
6262
ForwardProjectorByBin::set_up(proj_data_info_sptr, density_info_sptr);
6363
check(*proj_data_info_sptr, *_density_sptr);
64-
_symmetries_sptr.reset(new TrivialDataSymmetriesForBins(proj_data_info_sptr));
64+
65+
auto& target_cast = dynamic_cast<const VoxelsOnCartesianGrid<elemT>&>(*target_sptr);
66+
auto sizes = target_cast.get_lengths();
67+
68+
this->z_dim = sizes[1];
69+
this->y_dim = sizes[2];
70+
this->x_dim = sizes[3];
71+
72+
// Set the thread block and grid dimensions using std::tuple
73+
this->block_dim.x = 8;
74+
this->block_dim.y = 8;
75+
this->block_dim.z = 8;
76+
77+
this->grid_dim.x = (this->x_dim + this->block_dim.x - 1) / this->block_dim.x;
78+
this->grid_dim.y = (this->y_dim + this->block_dim.y - 1) / this->block_dim.y;
79+
this->grid_dim.z = (this->z_dim + this->block_dim.z - 1) / this->block_dim.z;
80+
81+
// Check if z_dim is 1 or only 2D is true and return an error if it is
82+
if (this->z_dim == 1 || this->only_2D)
83+
{
84+
error(" requires a 3D image and only works for a 3x3x3 neighbourhood");
85+
return Succeeded::no;
86+
}
87+
88+
// {
89+
// if (this->d_kappa_data)
90+
// cudaFree(this->d_kappa_data);
91+
// auto kappa_ptr = this->get_kappa_sptr();
92+
// const bool do_kappa = !is_null_ptr(kappa_ptr);
6593

6694

6795
// Initialise projected_data_sptr from this->_proj_data_info_sptr
@@ -78,20 +106,76 @@ ForwardProjectorByBinSPECTGPU::set_up(const shared_ptr<const ProjDataInfo>& proj
78106

79107
void
80108
ForwardProjectorByBinSPECTGPU::actual_forward_project(
81-
RelatedViewgrams<float>&, const DiscretisedDensity<3, float>&, const int, const int, const int, const int)
109+
RelatedViewgrams<float>& stir_sino,
110+
const DiscretisedDensity<3, float>& stir_image,
111+
const int min_ax,
112+
const int max_ax,
113+
const int min_tg,
114+
const int max_tg)
82115
{
83-
throw std::runtime_error("Need to use set_input() if wanting to use ForwardProjectorByBinSPECTGPU.");
116+
//for all views in relateViewgram call the kernels
117+
dim3 cuda_block_dim(this->block_dim.x, this->block_dim.y, this->block_dim.z);
118+
dim3 cuda_grid_dim(this->grid_dim.x, this->grid_dim.y, this->grid_dim.z);
119+
viewgrams = _projected_data_sptr->get_related_viewgrams(viewgrams.get_basic_view_segment_num(), _symmetries_sptr);
120+
121+
for (auto view=0; view<viewgram.get_num_viewgrams(),view++)
122+
{
123+
float* dev_image;
124+
cudaMalloc(&dev_image, stir_image.size_all() * sizeof(float));
125+
array_to_device(dev_image, stir_image);
126+
127+
float* out_im;// need to copy on device?
128+
BasicCoordinate<3, int> min_ind, max_ind;
129+
130+
stir_image.get_regular_range(min_ind, max_ind);
131+
132+
const int min_z = min_ind[1];
133+
const int max_z = max_ind[1];
134+
135+
const int min_y = min_ind[2];
136+
const int max_y = max_ind[2];
137+
138+
const int min_x = min_ind[3];
139+
const int max_x = max_ind[3];
140+
141+
int3 dim(max_x - min_x + 1, max_y - min_y + 1, max_z - min_z + 1 );
142+
143+
float angle_rad; //todo
144+
float3 spacing(,,);
145+
float3 origin(,,);
146+
147+
rotateKernel_pull<<<cuda_grid_dim, cuda_block_dim>>>(dev_image,
148+
out_im,
149+
dim);
150+
151+
forwardKernel<<<cuda_grid_dim, cuda_block_dim>>>(out_im,
152+
viewgrams[view],
153+
dim);
154+
155+
}
156+
// cudaMalloc(&this->cuda_image, stir_image_sptr->size_all() * sizeof(elemT));
157+
// array_to_device(this->cuda_image, *stir_image_sptr);
158+
}
159+
160+
84161
}
85162

86163
void
87164
ForwardProjectorByBinSPECTGPU::actual_forward_project(
88165
RelatedViewgrams<float>& viewgrams, const int, const int, const int, const int)
89166
{
90-
// if (min_axial_pos_num != _proj_data_info_sptr->get_min_axial_pos_num() ||
167+
if (min_axial_pos_num != _proj_data_info_sptr->get_min_axial_pos_num() ||
91168
// ... )
92169
// error();
170+
//for all views in relateViewgram call the kernels
93171

94172
viewgrams = _projected_data_sptr->get_related_viewgrams(viewgrams.get_basic_view_segment_num(), _symmetries_sptr);
173+
for (auto view=0; view<viewgram.get_num_viewgrams(),view++)
174+
{
175+
cudaMalloc(&this->cuda_image, stir_image_sptr->size_all() * sizeof(elemT));
176+
}
177+
// cudaMalloc(&this->cuda_image, stir_image_sptr->size_all() * sizeof(elemT));
178+
// array_to_device(this->cuda_image, *stir_image_sptr);
95179
}
96180

97181
void

src/recon_buildblock/SPECTGPU_projector/SPECTGPUHelper.cxx

Lines changed: 40 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -31,91 +31,57 @@
3131
#include "stir/IO/stir_ecat_common.h"
3232
#include "stir/error.h"
3333
#include "stir/format.h"
34+
#include "stir/cuda_utilities.h"
3435
// Non-STIR includes
3536
#include <fstream>
3637
#include <math.h>
3738
#include "driver_types.h"
3839
// SPECTGPU includes
39-
#include "def.h"
40-
#include "auxmath.h"
41-
#include "prjb.h"
42-
#include "prjf.h"
43-
#include "recon.h"
44-
#include "lmproc.h"
45-
#include "scanner_0.h"
46-
#include "rnd.h"
47-
#include "norm.h"
4840

4941
START_NAMESPACE_STIR
5042

5143
SPECTGPUHelper::~SPECTGPUHelper()
5244
{}
5345

54-
// static void
55-
// delete_axialLUT(axialLUT* axlut_ptr)
56-
// {
57-
// if (!axlut_ptr)
58-
// return;
59-
// delete[] axlut_ptr->li2rno;
60-
// delete[] axlut_ptr->li2sn;
61-
// delete[] axlut_ptr->li2nos;
62-
// delete[] axlut_ptr->sn1_rno;
63-
// delete[] axlut_ptr->sn1_sn11;
64-
// delete[] axlut_ptr->sn1_ssrb;
65-
// delete[] axlut_ptr->sn1_sn11no;
66-
// }
67-
68-
static shared_ptr<Cnst>
69-
get_cnst(const Scanner& scanner, const bool cuda_verbose, const char cuda_device)
70-
{
71-
shared_ptr<Cnst> cnt_sptr = MAKE_SHARED<Cnst>();
7246

73-
cnt_sptr->DEVID = cuda_device; // device (GPU) ID. allows choosing the device on which to perform calculations
74-
cnt_sptr->VERBOSE = cuda_verbose;
47+
//static shared_ptr<Cnst>
48+
//get_cnst(const Scanner& scanner, const bool cuda_verbose, const char cuda_device)
49+
//{
50+
// shared_ptr<Cnst> cnt_sptr = MAKE_SHARED<Cnst>();
51+
52+
// cnt_sptr->DEVID = cuda_device; // device (GPU) ID. allows choosing the device on which to perform calculations
53+
// cnt_sptr->VERBOSE = cuda_verbose;
7554

76-
cnt_sptr->A = NSANGLES; // sino angles
77-
cnt_sptr->W = NSBINS; // sino bins for any angular index
78-
cnt_sptr->aw = AW; // sino bins (active only)
55+
// cnt_sptr->A = NSANGLES; // sino angles
56+
// cnt_sptr->W = NSBINS; // sino bins for any angular index
57+
// cnt_sptr->aw = AW; // sino bins (active only)
7958

80-
cnt_sptr->NCRS = nCRS; // number of crystals
81-
cnt_sptr->NRNG = NRINGS; // number of axial positions
82-
cnt_sptr->D = -1; // number of linear indexes along Michelogram diagonals /*unknown*/
83-
cnt_sptr->Bt = -1; // number of buckets transaxially /*unknown*/
59+
// cnt_sptr->NCRS = nCRS; // number of crystals
60+
// cnt_sptr->NRNG = NRINGS; // number of axial positions
61+
// cnt_sptr->D = -1; // number of linear indexes along Michelogram diagonals /*unknown*/
62+
// cnt_sptr->Bt = -1; // number of buckets transaxially /*unknown*/
8463

85-
cnt_sptr->B = NBUCKTS; // number of buckets (total)
86-
cnt_sptr->Cbt = 32552; // number of crystals in bucket transaxially /*unknown*/
87-
cnt_sptr->Cba = 3; // number of crystals in bucket axially /*unknown*/
64+
// cnt_sptr->B = NBUCKTS; // number of buckets (total)
65+
// cnt_sptr->Cbt = 32552; // number of crystals in bucket transaxially /*unknown*/
66+
// cnt_sptr->Cba = 3; // number of crystals in bucket axially /*unknown*/
8867

89-
cnt_sptr->NSN1 = NSINOS; // number of sinos
90-
cnt_sptr->NSN64 = NRINGS * NRINGS; // with no MRD limit
91-
cnt_sptr->NSEG0 = SEG0;
68+
// cnt_sptr->NSN1 = NSINOS; // number of sinos
69+
// cnt_sptr->NSN64 = NRINGS * NRINGS; // with no MRD limit
70+
// cnt_sptr->NSEG0 = SEG0;
9271

93-
cnt_sptr->RNG_STRT = 0;
94-
cnt_sptr->RNG_END = NRINGS;
72+
// cnt_sptr->RNG_STRT = 0;
73+
// cnt_sptr->RNG_END = NRINGS;
9574

96-
cnt_sptr->ALPHA = aLPHA; // angle subtended by a crystal
97-
float R = 32.8f; // ring radius
98-
cnt_sptr->RE = R + 0.67f; // effective ring radius accounting for the depth of interaction
99-
cnt_sptr->AXR = SZ_RING; // axial crystal dim
75+
// cnt_sptr->ALPHA = aLPHA; // angle subtended by a crystal
76+
// float R = 32.8f; // ring radius
77+
// cnt_sptr->RE = R + 0.67f; // effective ring radius accounting for the depth of interaction
78+
// cnt_sptr->AXR = SZ_RING; // axial crystal dim
10079

101-
float CLGHT = 29979245800.f; // speed of light [cm/s]
102-
return cnt_sptr;
103-
}
80+
// float CLGHT = 29979245800.f; // speed of light [cm/s]
81+
// return cnt_sptr;
82+
//}
10483

105-
static inline unsigned
106-
to_1d_idx(const unsigned nrow, const unsigned ncol, const unsigned row, const unsigned col)
107-
{
108-
return col + ncol * row;
109-
}
11084

111-
template <class dataType>
112-
dataType*
113-
create_heap_array(const unsigned numel, const dataType val = dataType(0))
114-
{
115-
dataType* array = new dataType[numel];
116-
std::fill(array, array + numel, val);
117-
return array;
118-
}
11985

12086
void
12187
SPECTGPUHelper::set_up()
@@ -124,14 +90,14 @@ SPECTGPUHelper::set_up()
12490
throw std::runtime_error("SPECTGPUHelper::set_up() "
12591
"emission or transmission mode (att) not set.");
12692

127-
// Get consts
128-
_cnt_sptr = get_cnst(_scanner_type, _verbose, _devid);
93+
// // Get consts
94+
// _cnt_sptr = get_cnst(_scanner_type, _verbose, _devid);
12995

13096

131-
// isub
132-
_isub = std::vector<int>(unsigned(AW));
133-
for (unsigned i = 0; i < unsigned(AW); i++)
134-
_isub[i] = int(i);
97+
// // isub
98+
// _isub = std::vector<int>(unsigned(AW));
99+
// for (unsigned i = 0; i < unsigned(AW); i++)
100+
// _isub[i] = int(i);
135101

136102
_already_set_up = true;
137103
}
@@ -196,53 +162,6 @@ SPECTGPUHelper::convert_SPECTGPU_proj_3d_to_1d_idx(const unsigned ang, const uns
196162
}
197163

198164
void
199-
SPECTGPUHelper::permute(std::vector<float>& output_array,
200-
const std::vector<float>& orig_array,
201-
const unsigned output_dims[3],
202-
const unsigned permute_order[3]) const
203-
{
204-
#ifndef NDEBUG
205-
// Check that in the permute order, each number is between 0 and 2 (can't be <0 because it's unsigned)
206-
for (unsigned i = 0; i < 3; ++i)
207-
if (permute_order[i] > 2)
208-
throw std::runtime_error("Permute order values should be between 0 and 2.");
209-
// Check that each number is unique
210-
for (unsigned i = 0; i < 3; ++i)
211-
for (unsigned j = i + 1; j < 3; ++j)
212-
if (permute_order[i] == permute_order[j])
213-
throw std::runtime_error("Permute order values should be unique.");
214-
// Check that size of output_dims==arr.size()
215-
assert(orig_array.size() == output_dims[0] * output_dims[1] * output_dims[2]);
216-
// Check that output array is same size as input array
217-
assert(orig_array.size() == output_array.size());
218-
#endif
219-
220-
// Calculate old dimensions
221-
unsigned old_dims[3];
222-
for (unsigned i = 0; i < 3; ++i)
223-
old_dims[permute_order[i]] = output_dims[i];
224-
225-
// Loop over all elements
226-
for (unsigned old_1d_idx = 0; old_1d_idx < orig_array.size(); ++old_1d_idx)
227-
{
228-
229-
// From the 1d index, generate the old 3d index
230-
unsigned old_3d_idx[3]
231-
= { old_1d_idx / (old_dims[2] * old_dims[1]), (old_1d_idx / old_dims[2]) % old_dims[1], old_1d_idx % old_dims[2] };
232-
233-
// Get the corresponding new 3d index
234-
unsigned new_3d_idx[3];
235-
for (unsigned i = 0; i < 3; ++i)
236-
new_3d_idx[i] = old_3d_idx[permute_order[i]];
237-
238-
// Get the new 1d index from the new 3d index
239-
const unsigned new_1d_idx
240-
= new_3d_idx[0] * output_dims[2] * output_dims[1] + new_3d_idx[1] * output_dims[2] + new_3d_idx[2];
241-
242-
// Fill the data
243-
output_array[new_1d_idx] = orig_array[old_1d_idx];
244-
}
245-
}
246165

247166
void
248167
SPECTGPUHelper::back_project(std::vector<float>& image, const std::vector<float>& sino_no_gaps) const
@@ -262,11 +181,13 @@ SPECTGPUHelper::back_project(std::vector<float>& image, const std::vector<float>
262181
}
263182

264183
void
265-
SPECTGPUHelper::forward_project(std::vector<float>& sino, const std::vector<float>& image) const
184+
SPECTGPUHelper::forward_project(Array<3, elemT>& sino, const Array<3, elemT>& image) const
266185
{
267186
check_set_up();
268187
assert(!sino.empty());
269-
188+
// prjdatainmemory
189+
cudaMalloc(&this->cuda_image, stir_image_sptr->size_all() * sizeof(elemT));
190+
array_to_device(this->cuda_image, *stir_image_sptr);
270191
// Permute the data (as this is done on the SPECTGPU python side before forward projection
271192
// unsigned output_dims[3] = { 320, 320, 127 };
272193
// unsigned permute_order[3] = { 1, 2, 0 };

0 commit comments

Comments
 (0)