Skip to content

Commit cb53c09

Browse files
author
Martin Benes
committed
python hill accellerated
1 parent 3888be7 commit cb53c09

File tree

4 files changed

+104
-45
lines changed

4 files changed

+104
-45
lines changed

conseal/hill/_costmap.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
def _compute_cost(
2020
x0: np.ndarray,
21+
separable: bool = True,
2122
) -> np.ndarray:
2223
"""Computes HILL cost.
2324
@@ -55,12 +56,22 @@ def _compute_cost(
5556
)
5657

5758
# low-pass filter 2
58-
L2 = np.ones((15, 15), dtype='float32')/15**2
59-
I2[I2 < tools.EPS32] = tools.EPS32
60-
I3 = scipy.signal.convolve2d(
61-
1./(I2), L2,
62-
mode='same', boundary='symm',
63-
)
59+
if separable:
60+
L2 = np.ones((15,), dtype='float64') / 15
61+
I2[I2 < tools.EPS32] = tools.EPS32
62+
tmp = scipy.signal.convolve2d(
63+
1./(I2), L2[:, None], mode="same", boundary="symm"
64+
)
65+
I3 = scipy.signal.convolve2d(
66+
tmp, L2[None, :], mode="same", boundary="symm"
67+
)
68+
else:
69+
L2 = np.ones((15, 15), dtype='float64')/15**2
70+
I2[I2 < tools.EPS32] = tools.EPS32
71+
I3 = scipy.signal.convolve2d(
72+
1./(I2), L2,
73+
mode='same', boundary='symm',
74+
)
6475

6576
#
6677
return I3

run.py

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,27 +17,51 @@
1717

1818
x0 = np.array(Image.open('test/assets/cover/uncompressed_gray/seal1.png'))
1919

20-
with cl.BACKEND_PYTHON:
21-
import time
22-
start = time.perf_counter()
23-
rho2 = cl.wow._costmap.compute_cost(x0, separable=False)
24-
end = time.perf_counter()
25-
print('Py 2D:', end - start)
26-
start = time.perf_counter()
27-
rho1 = cl.wow._costmap.compute_cost(x0, separable=True)
28-
end = time.perf_counter()
29-
print('Py 2x1D:', end - start)
30-
31-
with cl.BACKEND_RUST:
32-
start = time.perf_counter()
33-
rho3 = cl.wow._costmap.compute_cost(x0)
34-
end = time.perf_counter()
35-
print('Rs 2x1D:', end - start)
36-
37-
38-
np.testing.assert_array_equal(rho1, rho2)
39-
40-
exit()
20+
# with cl.BACKEND_PYTHON:
21+
# import time
22+
# start = time.perf_counter()
23+
# rho1 = cl.hill._costmap._compute_cost(x0, separable=False)
24+
# end = time.perf_counter()
25+
# print('Py:', end - start)
26+
# start = time.perf_counter()
27+
# rho2 = cl.hill._costmap._compute_cost(x0, separable=True)
28+
# end = time.perf_counter()
29+
# print('Py sep:', end - start)
30+
31+
# with cl.BACKEND_RUST:
32+
# start = time.perf_counter()
33+
# rho3 = cl.hill._costmap.compute_cost(x0)
34+
# end = time.perf_counter()
35+
# print('Rs:', end - start)
36+
37+
# np.testing.assert_allclose(rho1, rho2, rtol=1e-5)
38+
# np.testing.assert_allclose(rho2, rho3, rtol=1e-5)
39+
# # exit()
40+
41+
42+
43+
# with cl.BACKEND_PYTHON:
44+
# import time
45+
# start = time.perf_counter()
46+
# rho2 = cl.wow._costmap.compute_cost(x0, separable=False)
47+
# end = time.perf_counter()
48+
# print('Py 2D:', end - start)
49+
# start = time.perf_counter()
50+
# rho1 = cl.wow._costmap.compute_cost(x0, separable=True)
51+
# end = time.perf_counter()
52+
# print('Py 2x1D:', end - start)
53+
54+
# with cl.BACKEND_RUST:
55+
# start = time.perf_counter()
56+
# rho3 = cl.wow._costmap.compute_cost(x0)
57+
# end = time.perf_counter()
58+
# print('Rs 2x1D:', end - start)
59+
60+
61+
# np.testing.assert_allclose(rho1, rho2, rtol=1e-5)
62+
# np.testing.assert_allclose(rho1, rho3, rtol=1e-5)
63+
64+
# exit()
4165

4266
# x0 = np.array(Image.open('test/assets/cover/uncompressed_gray/seal1.png'))
4367

src/hill.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
use pyo3::prelude::*;
33
// use pyo3::types::PyDict;
4-
use numpy::{PyArray2, PyReadonlyArray2, ndarray::Array};
4+
use numpy::{PyArray2, PyReadonlyArray2, ndarray::Array, ndarray::s};
55

66

77
/// Computes HILL cost.
@@ -72,6 +72,30 @@ fn compute_cost<'py>(py: Python<'py>, x0: PyReadonlyArray2<'py, u8>) -> PyResult
7272
}
7373
}
7474

75+
// // convolve with AVG 15x15 (separated)
76+
// let mut tmp = Array::<f32, _>::zeros((h, w));
77+
// for i in 7..I2.nrows()-7 {
78+
// for j in 0..I2.ncols() {
79+
// //
80+
// let mut sum = 0.0;
81+
// for offset in -7i32..=7i32 {
82+
// sum += I2[[(i as i32 + offset) as usize, j]];
83+
// }
84+
// tmp[[i, 7]] = sum / 15.0;
85+
// }
86+
// }
87+
// let mut cost = Array::<f32, _>::zeros((h, w));
88+
// for i in 0..tmp.nrows() {
89+
// for j in 7..I2.ncols()-7 {
90+
// //
91+
// let mut sum = 0.0;
92+
// for offset in -7i32..=7i32 {
93+
// sum += tmp[[i, (j as i32 + offset) as usize]];
94+
// }
95+
// tmp[[7, j]] = sum / 15.0;
96+
// }
97+
// }
98+
7599
// convolve with AVG 15x15
76100
let mut cost = Array::<f32, _>::zeros((h, w));
77101
for i in 7..I2.nrows()-7 {

src/wow.rs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,18 @@ use numpy::{ndarray::Array, ndarray::Array1, ndarray::Array2, ndarray::Array3, n
88

99

1010
// ---------- internal helper, NOT exposed to Python ----------
11-
fn daubechies8() -> (Array3<f64>, Vec<(Array1<f64>, Array1<f64>)>) {
12-
let hpdf: [f64; 16] = [
11+
fn daubechies8() -> (Array3<f32>, Vec<(Array1<f32>, Array1<f32>)>) {
12+
let hpdf: [f32; 16] = [
1313
-0.0544158422, 0.3128715909, -0.6756307363, 0.5853546837,
1414
0.0158291053, -0.2840155430, -0.0004724846, 0.1287474266,
1515
0.0173693010, -0.0440882539, -0.0139810279, 0.0087460940,
1616
0.0048703530, -0.0003917404, -0.0006754494, -0.0001174768
1717
];
1818

1919
// build lpdf
20-
let mut lpdf = [0f64; 16];
20+
let mut lpdf = [0f32; 16];
2121
for i in 0..16 {
22-
lpdf[i] = ((-1f64).powi(i as i32)) * hpdf[15 - i];
22+
lpdf[i] = ((-1f32).powi(i as i32)) * hpdf[15 - i];
2323
}
2424

2525
let h = Array::from_shape_vec((16, 1), hpdf.to_vec()).unwrap();
@@ -49,9 +49,9 @@ fn reflect_index(i: isize, n: isize) -> isize {
4949
}
5050

5151
/// Symmetric pad a 2D array
52-
fn pad_symmetric(input: &Array2<f64>, pad_v: usize, pad_h: usize) -> Array2<f64> {
52+
fn pad_symmetric(input: &Array2<f32>, pad_v: usize, pad_h: usize) -> Array2<f32> {
5353
let (h, w) = input.dim();
54-
let mut output = Array2::<f64>::zeros((h + 2*pad_v, w + 2*pad_h));
54+
let mut output = Array2::<f32>::zeros((h + 2*pad_v, w + 2*pad_h));
5555

5656
for i in 0..output.nrows() {
5757
for j in 0..output.ncols() {
@@ -64,18 +64,18 @@ fn pad_symmetric(input: &Array2<f64>, pad_v: usize, pad_h: usize) -> Array2<f64>
6464
}
6565

6666
/// 2D convolution with symmetric padding and mode='same'
67-
fn convolve2d(input: &Array2<f64>, kernel: &Array2<f64>) -> Array2<f64> {
67+
fn convolve2d(input: &Array2<f32>, kernel: &Array2<f32>) -> Array2<f32> {
6868
let (h, w) = input.dim();
6969
let (kh, kw) = kernel.dim();
7070
let pad_h = kh / 2;
7171
let pad_w = kw / 2;
7272
let pad = pad_h.max(pad_w);
7373
let input_pad = pad_symmetric(input, pad_h, pad_w);
7474

75-
let mut output = Array2::<f64>::zeros((h, w));
75+
let mut output = Array2::<f32>::zeros((h, w));
7676
for i in 0..h {
7777
for j in 0..w {
78-
let mut sum = 0.0f64;
78+
let mut sum = 0.0f32;
7979
for u in 0..kh {
8080
for v in 0..kw {
8181
let x = i + u;
@@ -92,13 +92,13 @@ fn convolve2d(input: &Array2<f64>, kernel: &Array2<f64>) -> Array2<f64> {
9292

9393

9494

95-
fn convolve1d_horizontal(input: &Array2<f64>, kernel: &[f64]) -> Array2<f64> {
95+
fn convolve1d_horizontal(input: &Array2<f32>, kernel: &[f32]) -> Array2<f32> {
9696
let (h, w) = input.dim();
9797
let k = kernel.len();
9898
let pad = k / 2;
9999
let input_pad = pad_symmetric(input, 0, pad);
100100

101-
let mut out = Array2::<f64>::zeros((h, w));
101+
let mut out = Array2::<f32>::zeros((h, w));
102102

103103
for i in 0..h {
104104
for j in 0..w {
@@ -113,7 +113,7 @@ fn convolve1d_horizontal(input: &Array2<f64>, kernel: &[f64]) -> Array2<f64> {
113113
out
114114
}
115115

116-
fn convolve1d_vertical(input: &Array2<f64>, kernel: &[f64]) -> Array2<f64> {
116+
fn convolve1d_vertical(input: &Array2<f32>, kernel: &[f32]) -> Array2<f32> {
117117
// transpose the input
118118
let input_t = input.t();
119119
let mut tmp = convolve1d_horizontal(&input_t.to_owned(), kernel);
@@ -135,10 +135,10 @@ fn convolve1d_vertical(input: &Array2<f64>, kernel: &[f64]) -> Array2<f64> {
135135
// #[pyo3(signature = (x0))]
136136
#[pyfunction]
137137
#[pyo3(signature = (x0, p = -1.0))]
138-
fn compute_cost<'py>(py: Python<'py>, x0: PyReadonlyArray2<'py, u8>, p: f64)
139-
-> PyResult<Py<PyArray2<f64>>> {
138+
fn compute_cost<'py>(py: Python<'py>, x0: PyReadonlyArray2<'py, u8>, p: f32)
139+
-> PyResult<Py<PyArray2<f32>>> {
140140

141-
let input = x0.as_array().mapv(|v| v as f64);
141+
let input = x0.as_array().mapv(|v| v as f32);
142142
let (h, w) = input.dim();
143143
let mut x0_pad = pad_symmetric(&input, 16 as usize, 16 as usize);
144144

@@ -191,14 +191,14 @@ fn compute_cost<'py>(py: Python<'py>, x0: PyReadonlyArray2<'py, u8>, p: f64)
191191
xi.push(x_crop);
192192
}
193193

194-
// convert xi Vec<Array2<f64>> into a single Array3<f64> of shape (3, h, w)
194+
// convert xi Vec<Array2<f32>> into a single Array3<f32> of shape (3, h, w)
195195
let xi_3d = Array3::from_shape_vec(
196196
(3, h, w),
197197
xi.into_iter().flat_map(|arr| arr.into_raw_vec()).collect()
198198
).unwrap();
199199

200200
// compute sum over channels of xi_i^p
201-
let rho = xi_3d.mapv(|v| v.max(f64::EPSILON)).mapv(|v| v.powf(p)).sum_axis(Axis(0)).mapv(|v| v.powf(-1.0f64 / p));
201+
let rho = xi_3d.mapv(|v| v.max(f32::EPSILON)).mapv(|v| v.powf(p)).sum_axis(Axis(0)).mapv(|v| v.powf(-1.0f32 / p));
202202
Ok(PyArray2::from_owned_array(py, rho).into())
203203
}
204204

0 commit comments

Comments
 (0)