@@ -66,7 +66,7 @@ def find_center_vo(
6666 step : float = 0.25 ,
6767 ratio : float = 0.5 ,
6868 drop : int = 20 ,
69- ) -> float :
69+ ) -> np . float32 :
7070 """
7171 Find the rotation axis location (aka the centre of rotation) using Nghia Vo's method. See the paper
7272 :cite:`vo2014reliable`.
@@ -99,7 +99,7 @@ def find_center_vo(
9999
100100 Returns
101101 -------
102- float
102+ float32
103103 Rotation axis location with a subpixel precision.
104104 """
105105 # if 2d sinogram is given it is extended into a 3D array along the vertical dimension
@@ -148,12 +148,6 @@ def find_center_vo(
148148 if dsp_angle > 1 or dsp_detX > 1 :
149149 _sino_cs = _downsample (_sino_cs , dsp_angle , dsp_detX )
150150
151- # NOTE: the gpu implementation of _downsample kernel bellow is erroneuos (different results with each run), needs to be re-written
152- # if dsp_angle > 1:
153- # _sino_cs = _downsample_kernel(_sino_cs, level=dsp_angle, axis=0)
154- # if dsp_detX > 1:
155- # _sino_cs = _downsample_kernel(_sino_cs, level=dsp_detX, axis=1)
156-
157151 # NOTE: this is correct implementation that avoids running any CUDA kernels. The performance is suboptimal
158152 init_cen = _search_coarse (_sino_cs , start_cor , stop_cor , ratio , drop )
159153
@@ -164,7 +158,7 @@ def find_center_vo(
164158 )
165159 cen_np = np .float32 (cp .asnumpy (fine_cen ))
166160 if cen_np == 0.0 :
167- return cor_initialisation_value
161+ return np . float32 ( cor_initialisation_value )
168162 else :
169163 return cen_np
170164
@@ -174,22 +168,7 @@ def _search_coarse(sino, smin, smax, ratio, drop):
174168 flip_sino = cp .ascontiguousarray (cp .fliplr (sino ))
175169 comp_sino = cp .ascontiguousarray (cp .flipud (sino ))
176170
177- # # NOTE: gpu code here, half a mask created to avoid sinofram concatenitation and save memory?
178- # mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
179- # # NOTE: old GPU code for the sizes with half data
180- # cen_fliplr = (ncol - 1.0) / 2.0
181- # smin_clip_val = max(min(smin + cen_fliplr, ncol - 1), 0)
182- # smin = smin_clip_val - cen_fliplr
183- # smax_clip_val = max(min(smax + cen_fliplr, ncol - 1), 0)
184- # smax = smax_clip_val - cen_fliplr
185- # start_cor = ncol // 2 + smin
186- # stop_cor = ncol // 2 + smax
187- # list_cor = cp.arange(start_cor, stop_cor + 0.5, 0.5, dtype=cp.float32)
188- # list_shift = 2.0 * (list_cor - cen_fliplr)
189- # list_metric = cp.empty(list_shift.shape, dtype=cp.float32)
190-
191171 mask = _create_mask (2 * nrow , ncol , 0.5 * ratio * ncol , drop )
192- # mask = cp.asarray(mask, dtype=cp.float32)
193172 cen_fliplr = (ncol - 1.0 ) / 2.0
194173 start_cor , stop_cor = np .sort ((smin , smax ))
195174 start_cor = np .int16 (np .clip (start_cor , 0 , ncol - 1 ))
@@ -198,10 +177,6 @@ def _search_coarse(sino, smin, smax, ratio, drop):
198177 list_shift = 2.0 * (list_cor - cen_fliplr )
199178 list_metric = cp .empty (list_shift .shape , dtype = cp .float32 )
200179
201- # NOTE: this gives a different result to the CPU code, also works with a half data and a half mask
202- # _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, list_metric)
203-
204- # This essentially repeats the CPU code... probably not optimal but correct
205180 sino_sino = cp .vstack ((sino , flip_sino ))
206181 for i , shift in enumerate (list_shift ):
207182 _sino = sino_sino [nrow :]
@@ -258,6 +233,7 @@ def _create_mask_numpy(nrow, ncol, radius, drop):
258233 mask [:, cen_col - 1 : cen_col + 2 ] = 0.0
259234 return mask
260235
236+
261237def _create_mask_half (nrow , ncol , radius , drop ):
262238 du = 1.0 / ncol
263239 dv = (nrow - 1.0 ) / (nrow * 2.0 * np .pi )
@@ -288,6 +264,7 @@ def _create_mask_half(nrow, ncol, radius, drop):
288264 kernel (grid_dims , block_dims , params )
289265 return mask
290266
267+
291268def _create_mask (nrow , ncol , radius , drop ):
292269 du = 1.0 / ncol
293270 dv = (nrow - 1.0 ) / (nrow * 2.0 * np .pi )
@@ -345,7 +322,8 @@ def _calculate_chunks(
345322 available_memory -= shift_size
346323 freq_domain_size = (
347324 # shift_size # it needs only half (RFFT), but complex64, so it's the same
348- shift_size * 2 # it needs full (FFT), with complex64, so it's double
325+ shift_size
326+ * 2 # it needs full (FFT), with complex64, so it's double
349327 )
350328 fft_plan_size = freq_domain_size
351329 size_per_shift = 2 * (fft_plan_size + freq_domain_size + shift_size )
@@ -450,6 +428,7 @@ def _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out):
450428 mat_freq [:size , :, :], mask , out = out [start_idx :stop_idx ], axis = (1 , 2 )
451429 )
452430
431+
453432def _downsample (image , dsp_fact0 , dsp_fact1 ):
454433 """Downsample an image by averaging.
455434
@@ -480,35 +459,6 @@ def _downsample(image, dsp_fact0, dsp_fact1):
480459 return image_dsp
481460
482461
483- def _downsample_kernel (sino , level , axis ):
484- assert sino .dtype == cp .float32 , "single precision floating point input required"
485- assert sino .flags ["C_CONTIGUOUS" ], "list_shift must be C-contiguous"
486-
487- dx , dz = sino .shape
488- # Determine the new size, dim, of the downsampled dimension
489- # dim_new_size = int(sino.shape[axis] / math.pow(2, level))
490- dim_new_size = int (sino .shape [axis ] / level )
491- shape = [dx , dz ]
492- shape [axis ] = dim_new_size
493- downsampled_data = cp .empty (shape , dtype = "float32" )
494-
495- block_x = 8
496- block_y = 8
497- block_dims = (block_x , block_y )
498- grid_x = (sino .shape [1 ] + block_x - 1 ) // block_x
499- grid_y = (sino .shape [0 ] + block_y - 1 ) // block_y
500- grid_dims = (grid_x , grid_y )
501- # 8x8 thread-block, which means 16 "lots" of columns to downsample per
502- # thread-block; 4 bytes per float, so allocate 16*6 = 64 bytes of shared
503- # memeory per thread-block
504- shared_mem_bytes = 64
505- params = (sino , dx , dz , level , downsampled_data )
506- module = load_cuda_module ("downsample_sino" )
507- kernel = module .get_function ("downsample_sino" )
508- kernel (grid_dims , block_dims , params , shared_mem = shared_mem_bytes )
509- return downsampled_data
510-
511-
512462##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
513463# # %%%%%%%%%%%%%%%%%%%%%%%%%find_center_360%%%%%%%%%%%%%%%%%%%%%%%%%
514464# --- Center of rotation (COR) estimation method ---#
@@ -858,7 +808,7 @@ def find_center_pc(
858808 Subpixel accuracy. Defaults to 0.5.
859809 rotc_guess : float, optional
860810 Initial guess value for the rotation center. Defaults to None.
861-
811+
862812 Returns
863813 ----------
864814 float
0 commit comments