joining all contributions; created an actual_forward_project() that calls the kernels; we need to double check that the kernel can write into the sino but probably we need to dodevice_to_host(stir_sino,cuda_array_created_by_forward)

danieldeidda · danieldeidda · commit aec9adf5a294 · 2026-03-06T16:34:56.000Z
diff --git a/src/include/stir/recon_buildblock/SPECTGPU_projector/BackProjectorByBinSPECTGPU.h b/src/include/stir/recon_buildblock/SPECTGPU_projector/BackProjectorByBinSPECTGPU.h
@@ -82,6 +82,13 @@ class BackProjectorByBinSPECTGPU : public RegisteredParsingObject<BackProjectorB
                                    const int min_tangential_pos_num,
                                    const int max_tangential_pos_num);
 
+  virtual void actual_back_project(DiscretisedDensity<3, float>& stir_image,
+                                   const RelatedViewgrams<float>&,
+                                   const int min_axial_pos_num,
+                                   const int max_axial_pos_num,
+                                   const int min_tangential_pos_num,
+                                   const int max_tangential_pos_num);
+
 private:
   shared_ptr<DataSymmetriesForViewSegmentNumbers> _symmetries_sptr;
   SPECTGPUHelper _helper;
diff --git a/src/include/stir/recon_buildblock/SPECTGPU_projector/ForwardProjectorByBinSPECTGPU.h b/src/include/stir/recon_buildblock/SPECTGPU_projector/ForwardProjectorByBinSPECTGPU.h
@@ -94,8 +94,19 @@ class ForwardProjectorByBinSPECTGPU : public RegisteredParsingObject<ForwardProj
                                       const int min_tangential_pos_num,
                                       const int max_tangential_pos_num);
 
+protected:
+  struct cppdim3
+  {
+    int x;
+    int y;
+    int z;
+  };
+
+  int z_dim, y_dim, x_dim;
+  cppdim3 block_dim;
+  cppdim3 grid_dim;
+
 private:
-  shared_ptr<DataSymmetriesForViewSegmentNumbers> _symmetries_sptr;
   shared_ptr<ProjDataInMemory> _projected_data_sptr;
   SPECTGPUHelper _helper;
   int _cuda_device;
diff --git a/src/recon_buildblock/SPECTGPU_projector/BackProjectorByBinSPECTGPU.cxx b/src/recon_buildblock/SPECTGPU_projector/BackProjectorByBinSPECTGPU.cxx
@@ -120,11 +120,11 @@ BackProjectorByBinSPECTGPU::start_accumulating_in_new_target()
 }
 
 void
-BackProjectorByBinSPECTGPU::actual_back_project(
+BackProjectorByBinSPECTGPU::actual_back_project(DiscretisedDensity<3, float> &stir_image,
     const RelatedViewgrams<float>& related_viewgrams, const int, const int, const int, const int)
 {
-  for (stir::RelatedViewgrams<float>::const_iterator iter = related_viewgrams.begin(); iter != related_viewgrams.end(); ++iter)
-    _helper.convert_viewgram_stir_to_SPECTGPU(_np_sino_w_gaps, *iter);
+
+//    call the kernels for backward
 }
 
 END_NAMESPACE_STIR
diff --git a/src/recon_buildblock/SPECTGPU_projector/ForwardProjectorByBinSPECTGPU.cxx b/src/recon_buildblock/SPECTGPU_projector/ForwardProjectorByBinSPECTGPU.cxx
@@ -61,7 +61,35 @@ ForwardProjectorByBinSPECTGPU::set_up(const shared_ptr<const ProjDataInfo>& proj
 {
   ForwardProjectorByBin::set_up(proj_data_info_sptr, density_info_sptr);
   check(*proj_data_info_sptr, *_density_sptr);
-  _symmetries_sptr.reset(new TrivialDataSymmetriesForBins(proj_data_info_sptr));
+
+  auto& target_cast = dynamic_cast<const VoxelsOnCartesianGrid<elemT>&>(*target_sptr);
+  auto sizes = target_cast.get_lengths();
+
+  this->z_dim = sizes[1];
+  this->y_dim = sizes[2];
+  this->x_dim = sizes[3];
+
+  // Set the thread block and grid dimensions using std::tuple
+  this->block_dim.x = 8;
+  this->block_dim.y = 8;
+  this->block_dim.z = 8;
+
+  this->grid_dim.x = (this->x_dim + this->block_dim.x - 1) / this->block_dim.x;
+  this->grid_dim.y = (this->y_dim + this->block_dim.y - 1) / this->block_dim.y;
+  this->grid_dim.z = (this->z_dim + this->block_dim.z - 1) / this->block_dim.z;
+
+  //  Check if z_dim is 1 or only 2D is true and return an error if it is
+  if (this->z_dim == 1 || this->only_2D)
+    {
+      error(" requires a 3D image and only works for a 3x3x3 neighbourhood");
+      return Succeeded::no;
+    }
+
+//  {
+//    if (this->d_kappa_data)
+//      cudaFree(this->d_kappa_data);
+//    auto kappa_ptr = this->get_kappa_sptr();
+//    const bool do_kappa = !is_null_ptr(kappa_ptr);
 
   
   // Initialise projected_data_sptr from this->_proj_data_info_sptr
@@ -78,20 +106,76 @@ ForwardProjectorByBinSPECTGPU::set_up(const shared_ptr<const ProjDataInfo>& proj
 
 void
 ForwardProjectorByBinSPECTGPU::actual_forward_project(
-    RelatedViewgrams<float>&, const DiscretisedDensity<3, float>&, const int, const int, const int, const int)
+    RelatedViewgrams<float>& stir_sino,
+        const DiscretisedDensity<3, float>& stir_image,
+        const int min_ax,
+        const int max_ax,
+        const int min_tg,
+        const int max_tg)
 {
-  throw std::runtime_error("Need to use set_input() if wanting to use ForwardProjectorByBinSPECTGPU.");
+    //for all views in relateViewgram call the kernels
+    dim3 cuda_block_dim(this->block_dim.x, this->block_dim.y, this->block_dim.z);
+    dim3 cuda_grid_dim(this->grid_dim.x, this->grid_dim.y, this->grid_dim.z);
+    viewgrams = _projected_data_sptr->get_related_viewgrams(viewgrams.get_basic_view_segment_num(), _symmetries_sptr);
+
+    for (auto view=0; view<viewgram.get_num_viewgrams(),view++)
+    {
+        float* dev_image;
+        cudaMalloc(&dev_image, stir_image.size_all() * sizeof(float));
+        array_to_device(dev_image, stir_image);
+
+        float* out_im;// need to copy on device?
+        BasicCoordinate<3, int> min_ind, max_ind;
+
+        stir_image.get_regular_range(min_ind, max_ind);
+
+        const int min_z = min_ind[1];
+        const int max_z = max_ind[1];
+
+        const int min_y = min_ind[2];
+        const int max_y = max_ind[2];
+
+        const int min_x = min_ind[3];
+        const int max_x = max_ind[3];
+
+        int3 dim(max_x - min_x + 1, max_y - min_y + 1, max_z - min_z + 1 );
+
+        float angle_rad; //todo
+        float3 spacing(,,);
+        float3 origin(,,);
+
+        rotateKernel_pull<<<cuda_grid_dim, cuda_block_dim>>>(dev_image,
+                                       out_im,
+                                       dim);
+
+        forwardKernel<<<cuda_grid_dim, cuda_block_dim>>>(out_im,
+                                     viewgrams[view],
+                                     dim);
+
+      }
+      //  cudaMalloc(&this->cuda_image, stir_image_sptr->size_all() * sizeof(elemT));
+    //  array_to_device(this->cuda_image, *stir_image_sptr);
+    }
+
+
 }
 
 void
 ForwardProjectorByBinSPECTGPU::actual_forward_project(
     RelatedViewgrams<float>& viewgrams, const int, const int, const int, const int)
 {
-  //    if (min_axial_pos_num != _proj_data_info_sptr->get_min_axial_pos_num() ||
+      if (min_axial_pos_num != _proj_data_info_sptr->get_min_axial_pos_num() ||
   //         ... )
   //       error();
+//for all views in relateViewgram call the kernels
 
   viewgrams = _projected_data_sptr->get_related_viewgrams(viewgrams.get_basic_view_segment_num(), _symmetries_sptr);
+  for (auto view=0; view<viewgram.get_num_viewgrams(),view++)
+  {
+      cudaMalloc(&this->cuda_image, stir_image_sptr->size_all() * sizeof(elemT));
+  }
+  //  cudaMalloc(&this->cuda_image, stir_image_sptr->size_all() * sizeof(elemT));
+//  array_to_device(this->cuda_image, *stir_image_sptr);
 }
 
 void
diff --git a/src/recon_buildblock/SPECTGPU_projector/SPECTGPUHelper.cxx b/src/recon_buildblock/SPECTGPU_projector/SPECTGPUHelper.cxx
@@ -31,91 +31,57 @@
 #include "stir/IO/stir_ecat_common.h"
 #include "stir/error.h"
 #include "stir/format.h"
+#include "stir/cuda_utilities.h"
 // Non-STIR includes
 #include <fstream>
 #include <math.h>
 #include "driver_types.h"
 // SPECTGPU includes
-#include "def.h"
-#include "auxmath.h"
-#include "prjb.h"
-#include "prjf.h"
-#include "recon.h"
-#include "lmproc.h"
-#include "scanner_0.h"
-#include "rnd.h"
-#include "norm.h"
 
 START_NAMESPACE_STIR
 
 SPECTGPUHelper::~SPECTGPUHelper()
 {}
 
-// static void
-// delete_axialLUT(axialLUT* axlut_ptr)
-// {
-//   if (!axlut_ptr)
-//     return;
-//   delete[] axlut_ptr->li2rno;
-//   delete[] axlut_ptr->li2sn;
-//   delete[] axlut_ptr->li2nos;
-//   delete[] axlut_ptr->sn1_rno;
-//   delete[] axlut_ptr->sn1_sn11;
-//   delete[] axlut_ptr->sn1_ssrb;
-//   delete[] axlut_ptr->sn1_sn11no;
-// }
-
-static shared_ptr<Cnst>
-get_cnst(const Scanner& scanner, const bool cuda_verbose, const char cuda_device)
-{
-  shared_ptr<Cnst> cnt_sptr = MAKE_SHARED<Cnst>();
 
-  cnt_sptr->DEVID = cuda_device; // device (GPU) ID.  allows choosing the device on which to perform calculations
-  cnt_sptr->VERBOSE = cuda_verbose;
+//static shared_ptr<Cnst>
+//get_cnst(const Scanner& scanner, const bool cuda_verbose, const char cuda_device)
+//{
+//  shared_ptr<Cnst> cnt_sptr = MAKE_SHARED<Cnst>();
+
+//  cnt_sptr->DEVID = cuda_device; // device (GPU) ID.  allows choosing the device on which to perform calculations
+//  cnt_sptr->VERBOSE = cuda_verbose;
      
-  cnt_sptr->A = NSANGLES; // sino angles
-  cnt_sptr->W = NSBINS;   // sino bins for any angular index
-  cnt_sptr->aw = AW;      // sino bins (active only)
+//  cnt_sptr->A = NSANGLES; // sino angles
+//  cnt_sptr->W = NSBINS;   // sino bins for any angular index
+//  cnt_sptr->aw = AW;      // sino bins (active only)
 
-  cnt_sptr->NCRS = nCRS;   // number of crystals
-  cnt_sptr->NRNG = NRINGS; // number of axial positions
-  cnt_sptr->D = -1;        // number of linear indexes along Michelogram diagonals                         /*unknown*/
-  cnt_sptr->Bt = -1;       // number of buckets transaxially                                               /*unknown*/
+//  cnt_sptr->NCRS = nCRS;   // number of crystals
+//  cnt_sptr->NRNG = NRINGS; // number of axial positions
+//  cnt_sptr->D = -1;        // number of linear indexes along Michelogram diagonals                         /*unknown*/
+//  cnt_sptr->Bt = -1;       // number of buckets transaxially                                               /*unknown*/
 
-  cnt_sptr->B = NBUCKTS; // number of buckets (total)
-  cnt_sptr->Cbt = 32552; // number of crystals in bucket transaxially                                /*unknown*/
-  cnt_sptr->Cba = 3;     // number of crystals in bucket axially                                         /*unknown*/
+//  cnt_sptr->B = NBUCKTS; // number of buckets (total)
+//  cnt_sptr->Cbt = 32552; // number of crystals in bucket transaxially                                /*unknown*/
+//  cnt_sptr->Cba = 3;     // number of crystals in bucket axially                                         /*unknown*/
 
-  cnt_sptr->NSN1 = NSINOS;           // number of sinos 
-  cnt_sptr->NSN64 = NRINGS * NRINGS; // with no MRD limit
-  cnt_sptr->NSEG0 = SEG0;
+//  cnt_sptr->NSN1 = NSINOS;           // number of sinos
+//  cnt_sptr->NSN64 = NRINGS * NRINGS; // with no MRD limit
+//  cnt_sptr->NSEG0 = SEG0;
 
-  cnt_sptr->RNG_STRT = 0;
-  cnt_sptr->RNG_END = NRINGS;
+//  cnt_sptr->RNG_STRT = 0;
+//  cnt_sptr->RNG_END = NRINGS;
 
-  cnt_sptr->ALPHA = aLPHA;  // angle subtended by a crystal
-  float R = 32.8f;          // ring radius
-  cnt_sptr->RE = R + 0.67f; // effective ring radius accounting for the depth of interaction
-  cnt_sptr->AXR = SZ_RING;  // axial crystal dim
+//  cnt_sptr->ALPHA = aLPHA;  // angle subtended by a crystal
+//  float R = 32.8f;          // ring radius
+//  cnt_sptr->RE = R + 0.67f; // effective ring radius accounting for the depth of interaction
+//  cnt_sptr->AXR = SZ_RING;  // axial crystal dim
 
-  float CLGHT = 29979245800.f;                   // speed of light [cm/s]
-  return cnt_sptr;
-}
+//  float CLGHT = 29979245800.f;                   // speed of light [cm/s]
+//  return cnt_sptr;
+//}
 
-static inline unsigned
-to_1d_idx(const unsigned nrow, const unsigned ncol, const unsigned row, const unsigned col)
-{
-  return col + ncol * row;
-}
 
-template <class dataType>
-dataType*
-create_heap_array(const unsigned numel, const dataType val = dataType(0))
-{
-  dataType* array = new dataType[numel];
-  std::fill(array, array + numel, val);
-  return array;
-}
 
 void
 SPECTGPUHelper::set_up()
@@ -124,14 +90,14 @@ SPECTGPUHelper::set_up()
     throw std::runtime_error("SPECTGPUHelper::set_up() "
                              "emission or transmission mode (att) not set.");
 
-  // Get consts
-  _cnt_sptr = get_cnst(_scanner_type, _verbose, _devid);
+//  // Get consts
+//  _cnt_sptr = get_cnst(_scanner_type, _verbose, _devid);
 
 
-  // isub
-  _isub = std::vector<int>(unsigned(AW));
-  for (unsigned i = 0; i < unsigned(AW); i++)
-    _isub[i] = int(i);
+//  // isub
+//  _isub = std::vector<int>(unsigned(AW));
+//  for (unsigned i = 0; i < unsigned(AW); i++)
+//    _isub[i] = int(i);
 
   _already_set_up = true;
 }
@@ -196,53 +162,6 @@ SPECTGPUHelper::convert_SPECTGPU_proj_3d_to_1d_idx(const unsigned ang, const uns
 }
 
 void
-SPECTGPUHelper::permute(std::vector<float>& output_array,
-                        const std::vector<float>& orig_array,
-                        const unsigned output_dims[3],
-                        const unsigned permute_order[3]) const
-{
-#ifndef NDEBUG
-  // Check that in the permute order, each number is between 0 and 2 (can't be <0 because it's unsigned)
-  for (unsigned i = 0; i < 3; ++i)
-    if (permute_order[i] > 2)
-      throw std::runtime_error("Permute order values should be between 0 and 2.");
-  // Check that each number is unique
-  for (unsigned i = 0; i < 3; ++i)
-    for (unsigned j = i + 1; j < 3; ++j)
-      if (permute_order[i] == permute_order[j])
-        throw std::runtime_error("Permute order values should be unique.");
-  // Check that size of output_dims==arr.size()
-  assert(orig_array.size() == output_dims[0] * output_dims[1] * output_dims[2]);
-  // Check that output array is same size as input array
-  assert(orig_array.size() == output_array.size());
-#endif
-
-  // Calculate old dimensions
-  unsigned old_dims[3];
-  for (unsigned i = 0; i < 3; ++i)
-    old_dims[permute_order[i]] = output_dims[i];
-
-  // Loop over all elements
-  for (unsigned old_1d_idx = 0; old_1d_idx < orig_array.size(); ++old_1d_idx)
-    {
-
-      // From the 1d index, generate the old 3d index
-      unsigned old_3d_idx[3]
-          = { old_1d_idx / (old_dims[2] * old_dims[1]), (old_1d_idx / old_dims[2]) % old_dims[1], old_1d_idx % old_dims[2] };
-
-      // Get the corresponding new 3d index
-      unsigned new_3d_idx[3];
-      for (unsigned i = 0; i < 3; ++i)
-        new_3d_idx[i] = old_3d_idx[permute_order[i]];
-
-      // Get the new 1d index from the new 3d index
-      const unsigned new_1d_idx
-          = new_3d_idx[0] * output_dims[2] * output_dims[1] + new_3d_idx[1] * output_dims[2] + new_3d_idx[2];
-
-      // Fill the data
-      output_array[new_1d_idx] = orig_array[old_1d_idx];
-    }
-}
 
 void
 SPECTGPUHelper::back_project(std::vector<float>& image, const std::vector<float>& sino_no_gaps) const
@@ -262,11 +181,13 @@ SPECTGPUHelper::back_project(std::vector<float>& image, const std::vector<float>
 }
 
 void
-SPECTGPUHelper::forward_project(std::vector<float>& sino, const std::vector<float>& image) const
+SPECTGPUHelper::forward_project(Array<3, elemT>& sino, const Array<3, elemT>& image) const
 {
   check_set_up();
   assert(!sino.empty());
-
+//  prjdatainmemory
+  cudaMalloc(&this->cuda_image, stir_image_sptr->size_all() * sizeof(elemT));
+  array_to_device(this->cuda_image, *stir_image_sptr);
   // Permute the data (as this is done on the SPECTGPU python side before forward projection
   // unsigned output_dims[3] = { 320, 320, 127 };
   // unsigned permute_order[3] = { 1, 2, 0 };
diff --git a/src/recon_buildblock/SPECTGPU_projector/SPECTGPUProjection.cu b/src/recon_buildblock/SPECTGPU_projector/SPECTGPUProjection.cu
diff --git a/src/recon_buildblock/SPECTGPU_projector/SPECTGPURotateAndGaussianInterpolate.cu b/src/recon_buildblock/SPECTGPU_projector/SPECTGPURotateAndGaussianInterpolate.cu

Original file line number	Diff line number	Diff line change
`@@ -120,11 +120,11 @@ BackProjectorByBinSPECTGPU::start_accumulating_in_new_target()`
`120`	`120`	`}`
`121`	`121`
`122`	`122`	`void`
`123`		`-BackProjectorByBinSPECTGPU::actual_back_project(`
	`123`	`+BackProjectorByBinSPECTGPU::actual_back_project(DiscretisedDensity<3, float> &stir_image,`
`124`	`124`	`const RelatedViewgrams<float>& related_viewgrams, const int, const int, const int, const int)`
`125`	`125`	`{`
`126`		`- for (stir::RelatedViewgrams<float>::const_iterator iter = related_viewgrams.begin(); iter != related_viewgrams.end(); ++iter)`
`127`		`- _helper.convert_viewgram_stir_to_SPECTGPU(_np_sino_w_gaps, *iter);`
	`126`	`+`
	`127`	`+// call the kernels for backward`
`128`	`128`	`}`
`129`	`129`
`130`	`130`	`END_NAMESPACE_STIR`