python hill accellerated

Martin Benes · Martin Benes · commit cb53c090169e · 2025-12-04T09:14:28.000+01:00
diff --git a/conseal/hill/_costmap.py b/conseal/hill/_costmap.py
@@ -18,6 +18,7 @@
 
 def _compute_cost(
     x0: np.ndarray,
+    separable: bool = True,
 ) -> np.ndarray:
     """Computes HILL cost.
 
@@ -55,12 +56,22 @@ def _compute_cost(
     )
 
     # low-pass filter 2
-    L2 = np.ones((15, 15), dtype='float32')/15**2
-    I2[I2 < tools.EPS32] = tools.EPS32
-    I3 = scipy.signal.convolve2d(
-        1./(I2), L2,
-        mode='same', boundary='symm',
-    )
+    if separable:
+        L2 = np.ones((15,), dtype='float64') / 15
+        I2[I2 < tools.EPS32] = tools.EPS32
+        tmp = scipy.signal.convolve2d(
+            1./(I2), L2[:, None], mode="same", boundary="symm"
+        )
+        I3 = scipy.signal.convolve2d(
+            tmp, L2[None, :], mode="same", boundary="symm"
+        )
+    else:
+        L2 = np.ones((15, 15), dtype='float64')/15**2
+        I2[I2 < tools.EPS32] = tools.EPS32
+        I3 = scipy.signal.convolve2d(
+            1./(I2), L2,
+            mode='same', boundary='symm',
+        )
 
     #
     return I3
diff --git a/run.py b/run.py
@@ -17,27 +17,51 @@
 
 x0 = np.array(Image.open('test/assets/cover/uncompressed_gray/seal1.png'))
 
-with cl.BACKEND_PYTHON:
-    import time
-    start = time.perf_counter()
-    rho2 = cl.wow._costmap.compute_cost(x0, separable=False)
-    end = time.perf_counter()
-    print('Py 2D:', end - start)
-    start = time.perf_counter()
-    rho1 = cl.wow._costmap.compute_cost(x0, separable=True)
-    end = time.perf_counter()
-    print('Py 2x1D:', end - start)
-
-with cl.BACKEND_RUST:
-    start = time.perf_counter()
-    rho3 = cl.wow._costmap.compute_cost(x0)
-    end = time.perf_counter()
-    print('Rs 2x1D:', end - start)
-
-
-np.testing.assert_array_equal(rho1, rho2)
-
-exit()
+# with cl.BACKEND_PYTHON:
+#     import time
+#     start = time.perf_counter()
+#     rho1 = cl.hill._costmap._compute_cost(x0, separable=False)
+#     end = time.perf_counter()
+#     print('Py:', end - start)
+#     start = time.perf_counter()
+#     rho2 = cl.hill._costmap._compute_cost(x0, separable=True)
+#     end = time.perf_counter()
+#     print('Py sep:', end - start)
+
+# with cl.BACKEND_RUST:
+#     start = time.perf_counter()
+#     rho3 = cl.hill._costmap.compute_cost(x0)
+#     end = time.perf_counter()
+#     print('Rs:', end - start)
+
+# np.testing.assert_allclose(rho1, rho2, rtol=1e-5)
+# np.testing.assert_allclose(rho2, rho3, rtol=1e-5)
+# # exit()
+
+
+
+# with cl.BACKEND_PYTHON:
+#     import time
+#     start = time.perf_counter()
+#     rho2 = cl.wow._costmap.compute_cost(x0, separable=False)
+#     end = time.perf_counter()
+#     print('Py 2D:', end - start)
+#     start = time.perf_counter()
+#     rho1 = cl.wow._costmap.compute_cost(x0, separable=True)
+#     end = time.perf_counter()
+#     print('Py 2x1D:', end - start)
+
+# with cl.BACKEND_RUST:
+#     start = time.perf_counter()
+#     rho3 = cl.wow._costmap.compute_cost(x0)
+#     end = time.perf_counter()
+#     print('Rs 2x1D:', end - start)
+
+
+# np.testing.assert_allclose(rho1, rho2, rtol=1e-5)
+# np.testing.assert_allclose(rho1, rho3, rtol=1e-5)
+
+# exit()
 
 # x0 = np.array(Image.open('test/assets/cover/uncompressed_gray/seal1.png'))
 
diff --git a/src/hill.rs b/src/hill.rs
@@ -1,7 +1,7 @@
 
 use pyo3::prelude::*;
 // use pyo3::types::PyDict;
-use numpy::{PyArray2, PyReadonlyArray2, ndarray::Array};
+use numpy::{PyArray2, PyReadonlyArray2, ndarray::Array, ndarray::s};
 
 
 /// Computes HILL cost.
@@ -72,6 +72,30 @@ fn compute_cost<'py>(py: Python<'py>, x0: PyReadonlyArray2<'py, u8>) -> PyResult
         }
     }
 
+    // // convolve with AVG 15x15 (separated)
+    // let mut tmp = Array::<f32, _>::zeros((h, w));
+    // for i in 7..I2.nrows()-7 {
+    //     for j in 0..I2.ncols() {
+    //         //
+    //         let mut sum = 0.0;
+    //         for offset in -7i32..=7i32 {
+    //             sum += I2[[(i as i32 + offset) as usize, j]];
+    //         }
+    //         tmp[[i, 7]] = sum / 15.0;
+    //     }
+    // }
+    // let mut cost = Array::<f32, _>::zeros((h, w));
+    // for i in 0..tmp.nrows() {
+    //     for j in 7..I2.ncols()-7 {
+    //         //
+    //         let mut sum = 0.0;
+    //         for offset in -7i32..=7i32 {
+    //             sum += tmp[[i, (j as i32 + offset) as usize]];
+    //         }
+    //         tmp[[7, j]] = sum / 15.0;
+    //     }
+    // }
+
     // convolve with AVG 15x15
     let mut cost = Array::<f32, _>::zeros((h, w));
     for i in 7..I2.nrows()-7 {
diff --git a/src/wow.rs b/src/wow.rs
@@ -8,18 +8,18 @@ use numpy::{ndarray::Array, ndarray::Array1, ndarray::Array2, ndarray::Array3, n
 
 
 // ---------- internal helper, NOT exposed to Python ----------
-fn daubechies8() -> (Array3<f64>, Vec<(Array1<f64>, Array1<f64>)>) {
-    let hpdf: [f64; 16] = [
+fn daubechies8() -> (Array3<f32>, Vec<(Array1<f32>, Array1<f32>)>) {
+    let hpdf: [f32; 16] = [
         -0.0544158422,  0.3128715909, -0.6756307363,  0.5853546837,
          0.0158291053, -0.2840155430, -0.0004724846,  0.1287474266,
          0.0173693010, -0.0440882539, -0.0139810279,  0.0087460940,
          0.0048703530, -0.0003917404, -0.0006754494, -0.0001174768
     ];
 
     // build lpdf
-    let mut lpdf = [0f64; 16];
+    let mut lpdf = [0f32; 16];
     for i in 0..16 {
-        lpdf[i] = ((-1f64).powi(i as i32)) * hpdf[15 - i];
+        lpdf[i] = ((-1f32).powi(i as i32)) * hpdf[15 - i];
     }
 
     let h = Array::from_shape_vec((16, 1), hpdf.to_vec()).unwrap();
@@ -49,9 +49,9 @@ fn reflect_index(i: isize, n: isize) -> isize {
 }
 
 /// Symmetric pad a 2D array
-fn pad_symmetric(input: &Array2<f64>, pad_v: usize, pad_h: usize) -> Array2<f64> {
+fn pad_symmetric(input: &Array2<f32>, pad_v: usize, pad_h: usize) -> Array2<f32> {
     let (h, w) = input.dim();
-    let mut output = Array2::<f64>::zeros((h + 2*pad_v, w + 2*pad_h));
+    let mut output = Array2::<f32>::zeros((h + 2*pad_v, w + 2*pad_h));
 
     for i in 0..output.nrows() {
         for j in 0..output.ncols() {
@@ -64,18 +64,18 @@ fn pad_symmetric(input: &Array2<f64>, pad_v: usize, pad_h: usize) -> Array2<f64>
 }
 
 /// 2D convolution with symmetric padding and mode='same'
-fn convolve2d(input: &Array2<f64>, kernel: &Array2<f64>) -> Array2<f64> {
+fn convolve2d(input: &Array2<f32>, kernel: &Array2<f32>) -> Array2<f32> {
     let (h, w) = input.dim();
     let (kh, kw) = kernel.dim();
     let pad_h = kh / 2;
     let pad_w = kw / 2;
     let pad = pad_h.max(pad_w);
     let input_pad = pad_symmetric(input, pad_h, pad_w);
 
-    let mut output = Array2::<f64>::zeros((h, w));
+    let mut output = Array2::<f32>::zeros((h, w));
     for i in 0..h {
         for j in 0..w {
-            let mut sum = 0.0f64;
+            let mut sum = 0.0f32;
             for u in 0..kh {
                 for v in 0..kw {
                     let x = i + u;
@@ -92,13 +92,13 @@ fn convolve2d(input: &Array2<f64>, kernel: &Array2<f64>) -> Array2<f64> {
 
 
 
-fn convolve1d_horizontal(input: &Array2<f64>, kernel: &[f64]) -> Array2<f64> {
+fn convolve1d_horizontal(input: &Array2<f32>, kernel: &[f32]) -> Array2<f32> {
     let (h, w) = input.dim();
     let k = kernel.len();
     let pad = k / 2;
     let input_pad = pad_symmetric(input, 0, pad);
 
-    let mut out = Array2::<f64>::zeros((h, w));
+    let mut out = Array2::<f32>::zeros((h, w));
 
     for i in 0..h {
         for j in 0..w {
@@ -113,7 +113,7 @@ fn convolve1d_horizontal(input: &Array2<f64>, kernel: &[f64]) -> Array2<f64> {
     out
 }
 
-fn convolve1d_vertical(input: &Array2<f64>, kernel: &[f64]) -> Array2<f64> {
+fn convolve1d_vertical(input: &Array2<f32>, kernel: &[f32]) -> Array2<f32> {
     // transpose the input
     let input_t = input.t();
     let mut tmp = convolve1d_horizontal(&input_t.to_owned(), kernel);
@@ -135,10 +135,10 @@ fn convolve1d_vertical(input: &Array2<f64>, kernel: &[f64]) -> Array2<f64> {
 // #[pyo3(signature = (x0))]
 #[pyfunction]
 #[pyo3(signature = (x0, p = -1.0))]
-fn compute_cost<'py>(py: Python<'py>, x0: PyReadonlyArray2<'py, u8>, p: f64)
-    -> PyResult<Py<PyArray2<f64>>> {
+fn compute_cost<'py>(py: Python<'py>, x0: PyReadonlyArray2<'py, u8>, p: f32)
+    -> PyResult<Py<PyArray2<f32>>> {
 
-    let input = x0.as_array().mapv(|v| v as f64);
+    let input = x0.as_array().mapv(|v| v as f32);
     let (h, w) = input.dim();
     let mut x0_pad = pad_symmetric(&input, 16 as usize, 16 as usize);
 
@@ -191,14 +191,14 @@ fn compute_cost<'py>(py: Python<'py>, x0: PyReadonlyArray2<'py, u8>, p: f64)
         xi.push(x_crop);
     }
 
-    // convert xi Vec<Array2<f64>> into a single Array3<f64> of shape (3, h, w)
+    // convert xi Vec<Array2<f32>> into a single Array3<f32> of shape (3, h, w)
     let xi_3d = Array3::from_shape_vec(
         (3, h, w),
         xi.into_iter().flat_map(|arr| arr.into_raw_vec()).collect()
     ).unwrap();
 
     // compute sum over channels of xi_i^p
-    let rho = xi_3d.mapv(|v| v.max(f64::EPSILON)).mapv(|v| v.powf(p)).sum_axis(Axis(0)).mapv(|v| v.powf(-1.0f64 / p));
+    let rho = xi_3d.mapv(|v| v.max(f32::EPSILON)).mapv(|v| v.powf(p)).sum_axis(Axis(0)).mapv(|v| v.powf(-1.0f32 / p));
     Ok(PyArray2::from_owned_array(py, rho).into())
 }