Skip to content

Commit 159af7d

Browse files
committed
WIP
1 parent 54fcb86 commit 159af7d

File tree

4 files changed

+99
-112
lines changed

4 files changed

+99
-112
lines changed

docs/source/metrics_calculate.py

Lines changed: 28 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from cupyx.scipy.fft import rfft2, fft2, fftshift
88
import httomolibgpu
99
from httomolibgpu.cuda_kernels import load_cuda_module
10+
from httomolibgpu.prep.normalize import normalize
1011
import httomolibgpu.recon.rotation
1112

1213
import matplotlib.pyplot as plt
@@ -95,81 +96,48 @@ def _create_mask_new(nrow, ncol, radius, drop):
9596
# Load the sinogram data
9697
path_lib = os.path.dirname(httomolibgpu.__file__)
9798
in_file = os.path.abspath(
98-
os.path.join(path_lib, "..", "tests/test_data/", "3600proj_sino.npz")
99+
os.path.join(path_lib, "..", "tests/test_data/", "i12LFOV.npz")
99100
)
100101
l_infile = np.load(in_file)
101-
sinogram = l_infile["sinogram"]
102-
angles = l_infile["angles"]
103-
sinogram = cp.asarray(sinogram)
104102

105-
sino_shape = sinogram.shape
103+
projdata = cp.asarray(l_infile["projdata"])
104+
flats = cp.asarray(l_infile["flats"])
105+
darks = cp.asarray(l_infile["darks"])
106+
del l_infile
106107

107-
print("The shape of the sinogram stack is {}".format(sino_shape))
108+
data_normalised = normalize(projdata, flats, darks, minus_log=False)
109+
del flats, darks, projdata
108110

109-
flip_sino = cp.ascontiguousarray(cp.fliplr(sinogram))
110-
comp_sino = cp.ascontiguousarray(cp.flipud(sinogram))
111+
spec = importlib.util.spec_from_file_location("rotation_new", "C:/Work/DiamondLightSource/httomolibgpu/docs/source/rotation_new.py")
112+
rotation_new = importlib.util.module_from_spec(spec)
113+
sys.modules["rotation_new"] = rotation_new
114+
spec.loader.exec_module(rotation_new)
111115

112-
(nrow, ncol) = sinogram.shape
113-
ratio = 0.5
114-
drop = 20
115-
smin = -100
116-
smax = 100
116+
# --- Running the centre of rotation algorithm ---#
117+
mid_slice = data_normalised.shape[1] // 2
117118

118-
mask = _create_mask_numpy(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
119-
120-
mask = cp.asarray(mask, dtype=cp.float32)
121-
cen_fliplr = (ncol - 1.0) / 2.0
122-
start_cor, stop_cor = np.sort((smin, smax))
123-
start_cor = np.int16(np.clip(start_cor, 0, ncol - 1))
124-
stop_cor = np.int16(np.clip(stop_cor, 0, ncol - 1))
125-
list_cor = cp.arange(start_cor, stop_cor + 1.0, dtype=cp.float32)
126-
list_shift = 2.0 * (list_cor - cen_fliplr)
127-
list_metric = cp.empty(list_shift.shape, dtype=cp.float32)
128-
129-
print(list_shift)
130-
131-
sino_sino = cp.vstack((sinogram, flip_sino))
132-
for i, shift in enumerate(list_shift):
133-
_sino = sino_sino[nrow:]
134-
_sino[...] = cp.roll(flip_sino, int(shift), axis=1)
135-
if shift >= 0:
136-
_sino[:, :shift] = comp_sino[:, :shift]
137-
else:
138-
_sino[:, shift:] = comp_sino[:, shift:]
139-
list_metric[i] = cp.mean(cp.abs(fftshift(fft2(sino_sino))) * mask)
140-
141-
print(list_metric)
142-
143-
spec = importlib.util.spec_from_file_location("rotation_new", "C:\Work\DiamondLightSource\httomolibgpu\docs\source\metrics_calculate.py")
144-
foo = importlib.util.module_from_spec(spec)
145-
sys.modules["rotation_new"] = foo
146-
spec.loader.exec_module(foo)
147-
foo.MyClass()
148-
149-
import docs.source.rotation_new as rotation_new
150-
151-
rotation_value = rotation.find_center_vo();
152-
new_rotation_value = rotation_new.find_center_vo();
119+
rotation_value = rotation.find_center_vo(data_normalised[:, mid_slice, :]);
120+
new_rotation_value = rotation_new.find_center_vo(data_normalised[:, mid_slice, :]);
153121

154122
print(rotation_value)
155123
print(new_rotation_value)
156124

157125
#subplot(r,c) provide the no. of rows and columns
158-
f, axarr = plt.subplots(2,2)
126+
# f, axarr = plt.subplots(2,2)
159127

160-
mask_2 = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
128+
# mask_2 = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
161129

162130
# use the created array to output your multiple images. In this case I have stacked 4 images vertically
163-
axarr[0, 0].imshow(mask.get())
164-
axarr[0, 0].set_title('Original mask')
165-
axarr[0, 1].imshow(mask_2.get())
166-
axarr[0, 1].set_title('GPU mask')
167-
axarr[1, 0].imshow(mask.get() - mask_2.get())
168-
axarr[1, 0].set_title('Difference of masks')
169-
axarr[1, 1].imshow(mask.get() - mask_2.get())
170-
axarr[1, 1].set_title('Difference of masks')
171-
172-
plt.show()
131+
# axarr[0, 0].imshow(mask.get())
132+
# axarr[0, 0].set_title('Original mask')
133+
# axarr[0, 1].imshow(mask_2.get())
134+
# axarr[0, 1].set_title('GPU mask')
135+
# axarr[1, 0].imshow(mask.get() - mask_2.get())
136+
# axarr[1, 0].set_title('Difference of masks')
137+
# axarr[1, 1].imshow(mask.get() - mask_2.get())
138+
# axarr[1, 1].set_title('Difference of masks')
139+
140+
# plt.show()
173141

174142

175143

httomolibgpu/cuda_kernels/center_360_shifts.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <cupy/complex.cuh>
22

33
extern "C" __global__ void
4-
shift_whole_shifts(const float *sino2, const float *sino3,
4+
shift_whole_shifts(const float *flip_sino, const float *comp_sino,
55
const float *__restrict__ list_shift, float *mat, int nx,
66
int nymat) {
77
int xid = threadIdx.x + blockIdx.x * blockDim.x;
@@ -17,14 +17,14 @@ shift_whole_shifts(const float *sino2, const float *sino3,
1717
float frac_part = modf(shift_col, &int_part);
1818
if (abs(frac_part) > 1e-5f) {
1919
// we have a floating point shift, so we only roll in
20-
// sino3, but we leave the rest for later using scipy
20+
// comp_sino, but we leave the rest for later using scipy
2121
int shift_int =
2222
shift_col >= 0.0 ? int(ceil(shift_col)) : int(floor(shift_col));
2323
if (shift_int >= 0 && xid < shift_int) {
24-
mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
24+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
2525
}
2626
if (shift_int < 0 && xid >= nx + shift_int) {
27-
mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
27+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
2828
}
2929
} else {
3030
// we have an integer shift, so we can roll in directly
@@ -33,16 +33,16 @@ shift_whole_shifts(const float *sino2, const float *sino3,
3333
if (shift_int >= 0) {
3434
if (xid >= shift_int) {
3535
mat[zid * nymat * nx + yid * nx + xid] =
36-
sino2[yid * nx + xid - shift_int];
36+
flip_sino[yid * nx + xid - shift_int];
3737
} else {
38-
mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
38+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
3939
}
4040
} else {
4141
if (xid < nx + shift_int) {
4242
mat[zid * nymat * nx + yid * nx + xid] =
43-
sino2[yid * nx + xid - shift_int];
43+
flip_sino[yid * nx + xid - shift_int];
4444
} else {
45-
mat[zid * nymat * nx + yid * nx + xid] = sino3[yid * nx + xid];
45+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
4646
}
4747
}
4848
}

httomolibgpu/cuda_kernels/generate_mask.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ extern "C" __global__ void generate_mask(const int ncol, const int nrow,
5050
mask[j * (ncol/2+1) + outi] = outval;
5151
}
5252

53-
extern "C" __global__ void generate_mask_new(const int ncol, const int nrow,
54-
const int cen_col, const int cen_row,
55-
const float du, const float dv,
56-
const float radius, const float drop,
57-
float *mask) {
53+
extern "C" __global__ void generate_mask_full(const int ncol, const int nrow,
54+
const int cen_col, const int cen_row,
55+
const float du, const float dv,
56+
const float radius, const float drop,
57+
float *mask) {
5858
int i = blockDim.x * blockIdx.x + threadIdx.x;
5959
int j = blockIdx.y;
6060

httomolibgpu/recon/rotation.py

Lines changed: 58 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -229,31 +229,16 @@ def _search_fine(sino, srad, step, init_cen, ratio, drop):
229229
flip_sino = cp.ascontiguousarray(cp.fliplr(sino))
230230
comp_sino = cp.ascontiguousarray(cp.flipud(sino))
231231
mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
232-
# mask = cp.asarray(mask, dtype=cp.float32)
233232

234233
cen_fliplr = (ncol - 1.0) / 2.0
235-
# NOTE: those are different to new implementation
236-
# srad = max(min(abs(float(srad)), ncol / 4.0), 1.0)
237-
# step = max(min(abs(step), srad), 0.1)
238234
srad = np.clip(np.abs(srad), 1, ncol // 10 - 1)
239235
step = np.clip(np.abs(step), 0.1, 1.1)
240236
init_cen = np.clip(init_cen, srad, ncol - srad - 1)
241237
list_cor = init_cen + cp.arange(-srad, srad + step, step, dtype=cp.float32)
242238
list_shift = 2.0 * (list_cor - cen_fliplr)
243239
list_metric = cp.empty(list_shift.shape, dtype="float32")
244240

245-
for i, shift_l in enumerate(list_shift):
246-
sino_shift = shift(flip_sino, (0, shift_l), order=3, prefilter=True)
247-
if shift_l >= 0:
248-
shift_int = int(cp.ceil(shift_l))
249-
sino_shift[:, :shift_int] = comp_sino[:, :shift_int]
250-
else:
251-
shift_int = int(cp.floor(shift_l))
252-
sino_shift[:, shift_int:] = comp_sino[:, shift_int:]
253-
mat1 = cp.vstack((sino, sino_shift))
254-
list_metric[i] = cp.mean(cp.abs(fftshift(fft2(mat1))) * mask)
255-
256-
# _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out=list_metric)
241+
_calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out=list_metric)
257242
cor = list_cor[cp.argmin(list_metric)]
258243
return cor
259244

@@ -273,6 +258,35 @@ def _create_mask_numpy(nrow, ncol, radius, drop):
273258
mask[:, cen_col - 1 : cen_col + 2] = 0.0
274259
return mask
275260

261+
def _create_mask_half(nrow, ncol, radius, drop):
262+
du = 1.0 / ncol
263+
dv = (nrow - 1.0) / (nrow * 2.0 * np.pi)
264+
cen_row = int(math.ceil(nrow / 2.0) - 1)
265+
cen_col = int(math.ceil(ncol / 2.0) - 1)
266+
drop = min([drop, int(math.ceil(0.05 * nrow))])
267+
268+
block_x = 128
269+
block_y = 1
270+
block_dims = (block_x, block_y)
271+
grid_x = (ncol // 2 + 1 + block_x - 1) // block_x
272+
grid_y = nrow
273+
grid_dims = (grid_x, grid_y)
274+
mask = cp.empty((nrow, ncol // 2 + 1), dtype="uint16")
275+
params = (
276+
ncol,
277+
nrow,
278+
cen_col,
279+
cen_row,
280+
cp.float32(du),
281+
cp.float32(dv),
282+
cp.float32(radius),
283+
cp.float32(drop),
284+
mask,
285+
)
286+
module = load_cuda_module("generate_mask")
287+
kernel = module.get_function("generate_mask")
288+
kernel(grid_dims, block_dims, params)
289+
return mask
276290

277291
def _create_mask(nrow, ncol, radius, drop):
278292
du = 1.0 / ncol
@@ -300,7 +314,7 @@ def _create_mask(nrow, ncol, radius, drop):
300314
mask,
301315
)
302316
module = load_cuda_module("generate_mask")
303-
kernel = module.get_function("generate_mask_new")
317+
kernel = module.get_function("generate_mask_full")
304318
kernel(grid_dims, block_dims, params)
305319
return mask
306320

@@ -344,28 +358,30 @@ def _calculate_chunks(
344358
return stop_idx
345359

346360

347-
def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
361+
def _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out):
348362
# this tries to simplify - if shift_col is integer, no need to spline interpolate
349363
assert list_shift.dtype == cp.float32, "shifts must be single precision floats"
350-
assert sino1.dtype == cp.float32, "sino1 must be float32"
351-
assert sino2.dtype == cp.float32, "sino1 must be float32"
352-
assert sino3.dtype == cp.float32, "sino1 must be float32"
353-
assert out.dtype == cp.float32, "sino1 must be float32"
354-
assert sino2.flags["C_CONTIGUOUS"], "sino2 must be C-contiguous"
355-
assert sino3.flags["C_CONTIGUOUS"], "sino3 must be C-contiguous"
364+
assert sino.dtype == cp.float32, "sino must be float32"
365+
assert flip_sino.dtype == cp.float32, "flip_sino must be float32"
366+
assert comp_sino.dtype == cp.float32, "comp_sino must be float32"
367+
assert out.dtype == cp.float32, "out must be float32"
368+
assert flip_sino.flags["C_CONTIGUOUS"], "flip_sino must be C-contiguous"
369+
assert comp_sino.flags["C_CONTIGUOUS"], "comp_sino must be C-contiguous"
356370
assert list_shift.flags["C_CONTIGUOUS"], "list_shift must be C-contiguous"
357371
nshifts = list_shift.shape[0]
358-
na1 = sino1.shape[0]
359-
na2 = sino2.shape[0]
372+
na1 = sino.shape[0]
373+
na2 = flip_sino.shape[0]
360374

361375
module = load_cuda_module("center_360_shifts")
362376
shift_whole_shifts = module.get_function("shift_whole_shifts")
363377
# note: we don't have to calculate the mean here, as we're only looking for minimum metric.
364378
# The sum is enough.
365379
masked_sum_abs_kernel = cp.ReductionKernel(
366-
in_params="complex64 x, uint16 mask", # input, complex + mask
380+
in_params="complex64 x, float32 mask", # input, complex + mask
381+
# in_params="complex64 x, uint16 mask", # input, complex + mask
367382
out_params="float32 out", # output, real
368-
map_expr="mask ? abs(x) : 0.0f",
383+
map_expr="abs(x) * mask",
384+
# map_expr="mask ? abs(x) : 0.0f",
369385
reduce_expr="a + b",
370386
post_map_expr="out = a",
371387
identity="0.0f",
@@ -376,13 +392,14 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
376392
# determine how many shifts we can fit in the available memory
377393
# and iterate in chunks
378394
chunks = _calculate_chunks(
379-
nshifts, (na1 + na2) * sino2.shape[1] * cp.float32().nbytes
395+
nshifts, (na1 + na2) * flip_sino.shape[1] * cp.float32().nbytes
380396
)
381397

382-
mat = cp.empty((chunks[0], na1 + na2, sino2.shape[1]), dtype=cp.float32)
383-
mat[:, :na1, :] = sino1
398+
mat = cp.empty((chunks[0], na1 + na2, flip_sino.shape[1]), dtype=cp.float32)
399+
mat[:, :na1, :] = sino
400+
384401
# explicitly create FFT plan here, so it's not cached and clearly re-used
385-
plan = get_fft_plan(mat, mat.shape[-2:], axes=(1, 2), value_type="R2C")
402+
plan = get_fft_plan(mat, mat.shape[-2:], axes=(1, 2), value_type="C2C")
386403

387404
for i, stop_idx in enumerate(chunks):
388405
if i > 0:
@@ -394,18 +411,18 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
394411
size = stop_idx - start_idx
395412

396413
# first, handle the integer shifts without spline in a raw kernel,
397-
# and shift in the sino3 one accordingly
414+
# and shift in the comp_sino one accordingly
398415
bx = 128
399-
gx = (sino3.shape[1] + bx - 1) // bx
416+
gx = (comp_sino.shape[1] + bx - 1) // bx
400417
shift_whole_shifts(
401418
grid=(gx, na2, size), ####
402419
block=(bx, 1, 1),
403420
args=(
404-
sino2,
405-
sino3,
421+
flip_sino,
422+
comp_sino,
406423
list_shift[start_idx:stop_idx],
407424
mat[:, na1:, :],
408-
sino3.shape[1],
425+
comp_sino.shape[1],
409426
na1 + na2,
410427
),
411428
)
@@ -415,7 +432,7 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
415432
for i in range(list_shift_host.shape[0]):
416433
shift_col = float(list_shift_host[i])
417434
if not shift_col.is_integer():
418-
shifted = shift(sino2, (0, shift_col), order=3, prefilter=True)
435+
shifted = shift(flip_sino, (0, shift_col), order=3, prefilter=True)
419436
shift_int = round_up(shift_col)
420437
if shift_int >= 0:
421438
mat[i, na1:, shift_int:] = shifted[:, shift_int:]
@@ -425,7 +442,9 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
425442
# stack and transform
426443
# (we do the full sized mat FFT, even though the last chunk may be smaller, to
427444
# make sure we can re-use the same FFT plan as before)
428-
mat_freq = rfft2(mat, axes=(1, 2), norm=None, plan=plan)
445+
# mat_freq = fft2(mat, axes=(1, 2), norm=None, plan=plan)
446+
mat_freq = fftshift(fft2(mat, axes=(1, 2), norm=None, plan=plan), axes=(1, 2))
447+
429448
masked_sum_abs_kernel(
430449
mat_freq[:size, :, :], mask, out=out[start_idx:stop_idx], axis=(1, 2)
431450
)

0 commit comments

Comments
 (0)