WIP

neon60 · neon60 · commit 159af7d604b7 · 2025-01-11T16:46:30.000+01:00
diff --git a/docs/source/metrics_calculate.py b/docs/source/metrics_calculate.py
@@ -7,6 +7,7 @@
 from cupyx.scipy.fft import rfft2, fft2, fftshift
 import httomolibgpu
 from httomolibgpu.cuda_kernels import load_cuda_module
+from httomolibgpu.prep.normalize import normalize
 import httomolibgpu.recon.rotation
 
 import matplotlib.pyplot as plt
@@ -95,81 +96,48 @@ def _create_mask_new(nrow, ncol, radius, drop):
 # Load the sinogram data
 path_lib = os.path.dirname(httomolibgpu.__file__)
 in_file = os.path.abspath(
-    os.path.join(path_lib, "..", "tests/test_data/", "3600proj_sino.npz")
+    os.path.join(path_lib, "..", "tests/test_data/", "i12LFOV.npz")
 )
 l_infile = np.load(in_file)
-sinogram = l_infile["sinogram"]
-angles = l_infile["angles"]
-sinogram = cp.asarray(sinogram)
 
-sino_shape = sinogram.shape
+projdata = cp.asarray(l_infile["projdata"])
+flats = cp.asarray(l_infile["flats"])
+darks = cp.asarray(l_infile["darks"])
+del l_infile
 
-print("The shape of the sinogram stack is {}".format(sino_shape))
+data_normalised = normalize(projdata, flats, darks, minus_log=False)
+del flats, darks, projdata
 
-flip_sino = cp.ascontiguousarray(cp.fliplr(sinogram))
-comp_sino = cp.ascontiguousarray(cp.flipud(sinogram))
+spec = importlib.util.spec_from_file_location("rotation_new", "C:/Work/DiamondLightSource/httomolibgpu/docs/source/rotation_new.py")
+rotation_new = importlib.util.module_from_spec(spec)
+sys.modules["rotation_new"] = rotation_new
+spec.loader.exec_module(rotation_new)
 
-(nrow, ncol) = sinogram.shape
-ratio = 0.5
-drop  = 20
-smin = -100
-smax = 100
+# --- Running the centre of rotation algorithm  ---#
+mid_slice = data_normalised.shape[1] // 2
 
-mask = _create_mask_numpy(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
-
-mask = cp.asarray(mask, dtype=cp.float32)
-cen_fliplr = (ncol - 1.0) / 2.0
-start_cor, stop_cor = np.sort((smin, smax))
-start_cor = np.int16(np.clip(start_cor, 0, ncol - 1))
-stop_cor = np.int16(np.clip(stop_cor, 0, ncol - 1))
-list_cor = cp.arange(start_cor, stop_cor + 1.0, dtype=cp.float32)
-list_shift = 2.0 * (list_cor - cen_fliplr)
-list_metric = cp.empty(list_shift.shape, dtype=cp.float32)
-
-print(list_shift)
-
-sino_sino = cp.vstack((sinogram, flip_sino))
-for i, shift in enumerate(list_shift):
-    _sino = sino_sino[nrow:]
-    _sino[...] = cp.roll(flip_sino, int(shift), axis=1)
-    if shift >= 0:
-        _sino[:, :shift] = comp_sino[:, :shift]
-    else:
-        _sino[:, shift:] = comp_sino[:, shift:]
-    list_metric[i] = cp.mean(cp.abs(fftshift(fft2(sino_sino))) * mask)
-
-print(list_metric)
-
-spec = importlib.util.spec_from_file_location("rotation_new", "C:\Work\DiamondLightSource\httomolibgpu\docs\source\metrics_calculate.py")
-foo = importlib.util.module_from_spec(spec)
-sys.modules["rotation_new"] = foo
-spec.loader.exec_module(foo)
-foo.MyClass()
-
-import docs.source.rotation_new as rotation_new
-
-rotation_value = rotation.find_center_vo();
-new_rotation_value = rotation_new.find_center_vo();
+rotation_value = rotation.find_center_vo(data_normalised[:, mid_slice, :]);
+new_rotation_value = rotation_new.find_center_vo(data_normalised[:, mid_slice, :]);
 
 print(rotation_value)
 print(new_rotation_value)
 
 #subplot(r,c) provide the no. of rows and columns
-f, axarr = plt.subplots(2,2) 
+# f, axarr = plt.subplots(2,2) 
 
-mask_2 = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
+# mask_2 = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
 
 # use the created array to output your multiple images. In this case I have stacked 4 images vertically
-axarr[0, 0].imshow(mask.get())
-axarr[0, 0].set_title('Original mask')
-axarr[0, 1].imshow(mask_2.get())
-axarr[0, 1].set_title('GPU mask')
-axarr[1, 0].imshow(mask.get() - mask_2.get())
-axarr[1, 0].set_title('Difference of masks')
-axarr[1, 1].imshow(mask.get() - mask_2.get())
-axarr[1, 1].set_title('Difference of masks')
-
-plt.show()
+# axarr[0, 0].imshow(mask.get())
+# axarr[0, 0].set_title('Original mask')
+# axarr[0, 1].imshow(mask_2.get())
+# axarr[0, 1].set_title('GPU mask')
+# axarr[1, 0].imshow(mask.get() - mask_2.get())
+# axarr[1, 0].set_title('Difference of masks')
+# axarr[1, 1].imshow(mask.get() - mask_2.get())
+# axarr[1, 1].set_title('Difference of masks')
+
+# plt.show()
 
 
         
diff --git a/httomolibgpu/cuda_kernels/center_360_shifts.cu b/httomolibgpu/cuda_kernels/center_360_shifts.cu
@@ -1,7 +1,7 @@
 #include <cupy/complex.cuh>
 
 extern "C" __global__ void
-shift_whole_shifts(const float *sino2, const float *sino3,
+shift_whole_shifts(const float *flip_sino, const float *comp_sino,
                    const float *__restrict__ list_shift, float *mat, int nx,
                    int nymat) {
   int xid = threadIdx.x + blockIdx.x * blockDim.x;
@@ -17,14 +17,14 @@ shift_whole_shifts(const float *sino2, const float *sino3,
   float frac_part = modf(shift_col, &int_part);
   if (abs(frac_part) > 1e-5f) {
     // we have a floating point shift, so we only roll in
-    // sino3, but we leave the rest for later using scipy
+    // comp_sino, but we leave the rest for later using scipy
     int shift_int =
         shift_col >= 0.0 ? int(ceil(shift_col)) : int(floor(shift_col));
     if (shift_int >= 0 && xid < shift_int) {
-      mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
+      mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
     }
     if (shift_int < 0 && xid >= nx + shift_int) {
-      mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
+      mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
     }
   } else {
     // we have an integer shift, so we can roll in directly
@@ -33,16 +33,16 @@ shift_whole_shifts(const float *sino2, const float *sino3,
     if (shift_int >= 0) {
       if (xid >= shift_int) {
         mat[zid * nymat * nx + yid * nx + xid] =
-            sino2[yid * nx + xid - shift_int];
+            flip_sino[yid * nx + xid - shift_int];
       } else {
-        mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
+        mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
       }
     } else {
       if (xid < nx + shift_int) {
         mat[zid * nymat * nx + yid * nx + xid] =
-            sino2[yid * nx + xid - shift_int];
+            flip_sino[yid * nx + xid - shift_int];
       } else {
-        mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
+        mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
       }
     }
   }
diff --git a/httomolibgpu/cuda_kernels/generate_mask.cu b/httomolibgpu/cuda_kernels/generate_mask.cu
@@ -50,11 +50,11 @@ extern "C" __global__ void generate_mask(const int ncol, const int nrow,
   mask[j * (ncol/2+1) + outi] = outval;
 }
 
-extern "C" __global__ void generate_mask_new(const int ncol, const int nrow,
-                                             const int cen_col, const int cen_row,
-                                             const float du, const float dv,
-                                             const float radius, const float drop,
-                                             float *mask) {
+extern "C" __global__ void generate_mask_full(const int ncol, const int nrow,
+                                              const int cen_col, const int cen_row,
+                                              const float du, const float dv,
+                                              const float radius, const float drop,
+                                              float *mask) {
   int i = blockDim.x * blockIdx.x + threadIdx.x;
   int j = blockIdx.y;
 
diff --git a/httomolibgpu/recon/rotation.py b/httomolibgpu/recon/rotation.py
@@ -229,31 +229,16 @@ def _search_fine(sino, srad, step, init_cen, ratio, drop):
     flip_sino = cp.ascontiguousarray(cp.fliplr(sino))
     comp_sino = cp.ascontiguousarray(cp.flipud(sino))
     mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
-    # mask = cp.asarray(mask, dtype=cp.float32)
 
     cen_fliplr = (ncol - 1.0) / 2.0
-    # NOTE: those are different to new implementation
-    # srad = max(min(abs(float(srad)), ncol / 4.0), 1.0)
-    # step = max(min(abs(step), srad), 0.1)
     srad = np.clip(np.abs(srad), 1, ncol // 10 - 1)
     step = np.clip(np.abs(step), 0.1, 1.1)
     init_cen = np.clip(init_cen, srad, ncol - srad - 1)
     list_cor = init_cen + cp.arange(-srad, srad + step, step, dtype=cp.float32)
     list_shift = 2.0 * (list_cor - cen_fliplr)
     list_metric = cp.empty(list_shift.shape, dtype="float32")
 
-    for i, shift_l in enumerate(list_shift):
-        sino_shift = shift(flip_sino, (0, shift_l), order=3, prefilter=True)
-        if shift_l >= 0:
-            shift_int = int(cp.ceil(shift_l))
-            sino_shift[:, :shift_int] = comp_sino[:, :shift_int]
-        else:
-            shift_int = int(cp.floor(shift_l))
-            sino_shift[:, shift_int:] = comp_sino[:, shift_int:]
-        mat1 = cp.vstack((sino, sino_shift))
-        list_metric[i] = cp.mean(cp.abs(fftshift(fft2(mat1))) * mask)
-
-    # _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out=list_metric)
+    _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out=list_metric)
     cor = list_cor[cp.argmin(list_metric)]
     return cor
 
@@ -273,6 +258,35 @@ def _create_mask_numpy(nrow, ncol, radius, drop):
     mask[:, cen_col - 1 : cen_col + 2] = 0.0
     return mask
 
+def _create_mask_half(nrow, ncol, radius, drop):
+    du = 1.0 / ncol
+    dv = (nrow - 1.0) / (nrow * 2.0 * np.pi)
+    cen_row = int(math.ceil(nrow / 2.0) - 1)
+    cen_col = int(math.ceil(ncol / 2.0) - 1)
+    drop = min([drop, int(math.ceil(0.05 * nrow))])
+
+    block_x = 128
+    block_y = 1
+    block_dims = (block_x, block_y)
+    grid_x = (ncol // 2 + 1 + block_x - 1) // block_x
+    grid_y = nrow
+    grid_dims = (grid_x, grid_y)
+    mask = cp.empty((nrow, ncol // 2 + 1), dtype="uint16")
+    params = (
+        ncol,
+        nrow,
+        cen_col,
+        cen_row,
+        cp.float32(du),
+        cp.float32(dv),
+        cp.float32(radius),
+        cp.float32(drop),
+        mask,
+    )
+    module = load_cuda_module("generate_mask")
+    kernel = module.get_function("generate_mask")
+    kernel(grid_dims, block_dims, params)
+    return mask
 
 def _create_mask(nrow, ncol, radius, drop):
     du = 1.0 / ncol
@@ -300,7 +314,7 @@ def _create_mask(nrow, ncol, radius, drop):
         mask,
     )
     module = load_cuda_module("generate_mask")
-    kernel = module.get_function("generate_mask_new")
+    kernel = module.get_function("generate_mask_full")
     kernel(grid_dims, block_dims, params)
     return mask
 
@@ -344,28 +358,30 @@ def _calculate_chunks(
     return stop_idx
 
 
-def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
+def _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out):
     # this tries to simplify - if shift_col is integer, no need to spline interpolate
     assert list_shift.dtype == cp.float32, "shifts must be single precision floats"
-    assert sino1.dtype == cp.float32, "sino1 must be float32"
-    assert sino2.dtype == cp.float32, "sino1 must be float32"
-    assert sino3.dtype == cp.float32, "sino1 must be float32"
-    assert out.dtype == cp.float32, "sino1 must be float32"
-    assert sino2.flags["C_CONTIGUOUS"], "sino2 must be C-contiguous"
-    assert sino3.flags["C_CONTIGUOUS"], "sino3 must be C-contiguous"
+    assert sino.dtype == cp.float32, "sino must be float32"
+    assert flip_sino.dtype == cp.float32, "flip_sino must be float32"
+    assert comp_sino.dtype == cp.float32, "comp_sino must be float32"
+    assert out.dtype == cp.float32, "out must be float32"
+    assert flip_sino.flags["C_CONTIGUOUS"], "flip_sino must be C-contiguous"
+    assert comp_sino.flags["C_CONTIGUOUS"], "comp_sino must be C-contiguous"
     assert list_shift.flags["C_CONTIGUOUS"], "list_shift must be C-contiguous"
     nshifts = list_shift.shape[0]
-    na1 = sino1.shape[0]
-    na2 = sino2.shape[0]
+    na1 = sino.shape[0]
+    na2 = flip_sino.shape[0]
 
     module = load_cuda_module("center_360_shifts")
     shift_whole_shifts = module.get_function("shift_whole_shifts")
     # note: we don't have to calculate the mean here, as we're only looking for minimum metric.
     # The sum is enough.
     masked_sum_abs_kernel = cp.ReductionKernel(
-        in_params="complex64 x, uint16 mask",  # input, complex + mask
+        in_params="complex64 x, float32 mask",  # input, complex + mask
+        # in_params="complex64 x, uint16 mask",  # input, complex + mask
         out_params="float32 out",  # output, real
-        map_expr="mask ? abs(x) : 0.0f",
+        map_expr="abs(x) * mask",
+        # map_expr="mask ? abs(x) : 0.0f",
         reduce_expr="a + b",
         post_map_expr="out = a",
         identity="0.0f",
@@ -376,13 +392,14 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
     # determine how many shifts we can fit in the available memory
     # and iterate in chunks
     chunks = _calculate_chunks(
-        nshifts, (na1 + na2) * sino2.shape[1] * cp.float32().nbytes
+        nshifts, (na1 + na2) * flip_sino.shape[1] * cp.float32().nbytes
     )
 
-    mat = cp.empty((chunks[0], na1 + na2, sino2.shape[1]), dtype=cp.float32)
-    mat[:, :na1, :] = sino1
+    mat = cp.empty((chunks[0], na1 + na2, flip_sino.shape[1]), dtype=cp.float32)
+    mat[:, :na1, :] = sino
+
     # explicitly create FFT plan here, so it's not cached and clearly re-used
-    plan = get_fft_plan(mat, mat.shape[-2:], axes=(1, 2), value_type="R2C")
+    plan = get_fft_plan(mat, mat.shape[-2:], axes=(1, 2), value_type="C2C")
 
     for i, stop_idx in enumerate(chunks):
         if i > 0:
@@ -394,18 +411,18 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
         size = stop_idx - start_idx
 
         # first, handle the integer shifts without spline in a raw kernel,
-        # and shift in the sino3 one accordingly
+        # and shift in the comp_sino one accordingly
         bx = 128
-        gx = (sino3.shape[1] + bx - 1) // bx
+        gx = (comp_sino.shape[1] + bx - 1) // bx
         shift_whole_shifts(
             grid=(gx, na2, size),  ####
             block=(bx, 1, 1),
             args=(
-                sino2,
-                sino3,
+                flip_sino,
+                comp_sino,
                 list_shift[start_idx:stop_idx],
                 mat[:, na1:, :],
-                sino3.shape[1],
+                comp_sino.shape[1],
                 na1 + na2,
             ),
         )
@@ -415,7 +432,7 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
         for i in range(list_shift_host.shape[0]):
             shift_col = float(list_shift_host[i])
             if not shift_col.is_integer():
-                shifted = shift(sino2, (0, shift_col), order=3, prefilter=True)
+                shifted = shift(flip_sino, (0, shift_col), order=3, prefilter=True)
                 shift_int = round_up(shift_col)
                 if shift_int >= 0:
                     mat[i, na1:, shift_int:] = shifted[:, shift_int:]
@@ -425,7 +442,9 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
         # stack and transform
         # (we do the full sized mat FFT, even though the last chunk may be smaller, to
         # make sure we can re-use the same FFT plan as before)
-        mat_freq = rfft2(mat, axes=(1, 2), norm=None, plan=plan)
+        # mat_freq = fft2(mat, axes=(1, 2), norm=None, plan=plan)
+        mat_freq = fftshift(fft2(mat, axes=(1, 2), norm=None, plan=plan), axes=(1, 2))
+
         masked_sum_abs_kernel(
             mat_freq[:size, :, :], mask, out=out[start_idx:stop_idx], axis=(1, 2)
         )