Line simplification algorithms (#952)

m-birke · web-flow · commit 96df7aadd313 · 2025-05-14T12:53:49.000+02:00
* add rdp algorithm

* add Visvalingam-Whyatt algorithm
diff --git a/scripts/algorithms/RamerDouglasPeucker.daph b/scripts/algorithms/RamerDouglasPeucker.daph
@@ -0,0 +1,158 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Modifications 2024 The DAPHNE Consortium.
+#
+#-------------------------------------------------------------
+
+# rdpLine: Implementation of the Ramer-Douglas-Peucker algorithm
+# See https://martinfleischmann.net/line-simplification-algorithms/
+#
+# INPUT:
+# ------------------------------------------------------------------------------
+# x_data          Single column matrix with x values
+# y_data          Single column matrix with y values
+# max_points      Max points to keep after reduction
+# tolerance       Width of the band around the auxiliary line
+# ------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------------------------------------
+# reduced_idxs    Indexes of the reduced points
+# ------------------------------------------------------------------------------
+
+def crossProduct2R(lhs: matrix<f64>, rhs: matrix<f64>) -> matrix<f64> {
+    // specific implementation of a cross prod in R2
+    // calculates the cross product for several R2 vectors at once
+    // expects lhs matrix with dimensions 1x2 and rhs matrix with dim nx2
+    z = rhs[,1] * lhs[0,0] - rhs[,0] * lhs[0,1];
+    return z;
+}
+
+
+def frobeniusNormR2(vec: matrix<f64>) -> matrix<f64> {
+    // calcs norm for 1x2 matrix (vector)
+    return sqrt(pow(vec[0,0], 2) + pow(vec[0,1], 2));
+}
+
+
+def normalDistance(points: matrix<f64>) -> matrix<f64> {
+    // calculates the max distance of all points to the line between the first and the last point
+    // normal distance means distance of point to line where distance vector has rect angle to aux line
+    // line between p1 and p2; normal distance from that line to p3
+    // uses formular: abs(cross(p2-p1, p1-p3)) / norm(p2-p1)
+    // expects input shape: num_points X 2
+    // returns output shape: (num_points - 2) X 1
+
+    line = points[(nrow(points) - 1),] - points[0,]; // vector from 1st point to last point
+    vector_diffs = (points[1:(nrow(points) - 1),] - points[0,]) * -1; // p1-p3 for all p3 between p1 and p2
+    // for(i in 0:(nrow(points)-1)) { TODO not possible
+
+    cross = crossProduct2R(line, vector_diffs); // calc cross product for all p3
+    abs_cross = abs(cross);
+
+
+    line_frobenius_norm = frobeniusNormR2(line);
+    dist_norm = abs_cross / line_frobenius_norm; // abs_cross/norm(p2-p1)
+    return dist_norm;
+}
+
+
+def rdpLine(x_data: matrix<f64>, y_data: matrix<f64>, max_points: si64, tolerance: f64) {
+
+    // normalize data
+    x_data_norm = (x_data - aggMin(x_data)) / (aggMax(x_data) - aggMin(x_data));
+    y_data_norm = (y_data - aggMin(y_data)) / (aggMax(y_data) - aggMin(y_data));
+
+    // compose matrix with (x,y) doubles
+    points = cbind(x_data_norm, y_data_norm);
+
+    // slices -> max points -1
+    // values need only calculated for one slice less since then termination criterion is fullfilled
+    // hence -2
+    state_arrays_size = max_points-2;
+
+    last_idx = nrow(x_data_norm) - 1;
+    slices = fill(last_idx, state_arrays_size+2, 2); // having one dummy slice in the end for the last point
+    slices[0, :] = transpose(as.matrix([0, last_idx]));
+
+    if (max_points <= 2)
+        return slices;
+
+    num_slices = 1;
+    new_slice_one = 0;
+    new_slice_two = -1;
+
+    max_distance_of_all_slices = fill(-1.0, state_arrays_size, 1); // instanciate empty matrix, neg. value ensures that this wont be a max
+    abs_arg_max_distance_of_all_slices = as.matrix<si64>(fill(inf, state_arrays_size, 1)); // same trick like line above, value will never be accessed
+
+    to_terminate = false;
+    while (to_terminate == false) {
+        // 1st new slice
+        slice_start = as.scalar<si64>(slices[new_slice_one,0]);
+        slice_end = as.scalar<si64>(slices[new_slice_one,1]);
+        if ((slice_end - slice_start) > 1){ // if points are right next to each other there is no distance to calc of points in between
+            slice_dist_norm = normalDistance(points[slice_start:(slice_end+1),]);
+            // calculate max/argmax normal distance of current slice
+            slice_dist_max = aggMax(slice_dist_norm);
+            max_distance_of_all_slices[new_slice_one, 0] = as.matrix(slice_dist_max);
+            // calc abs. idx of slice distance max: add slice_start to get absolute idx, +1 cause normalDistance returns withidx -1
+            slice_dist_absoluteArgMax = idxMax(slice_dist_norm, 1) + slice_start + 1;
+            abs_arg_max_distance_of_all_slices[new_slice_one, 0] = as.matrix(slice_dist_absoluteArgMax);
+        }
+        else {
+            max_distance_of_all_slices[new_slice_one, 0] = as.matrix([-1.0]);
+        }
+
+        // 2nd new slice
+        if (num_slices > 1){
+            slice_start = as.scalar<si64>(slices[new_slice_two,0]);
+            slice_end = as.scalar<si64>(slices[new_slice_two,1]);
+            if ((slice_end - slice_start) > 1){ // if points are right next to each other there is no distance to calc of points in between
+                slice_dist_norm = normalDistance(points[slice_start:(slice_end+1),]);
+                // calculate max/argmax normal distance of current slice
+                slice_dist_max = aggMax(slice_dist_norm);
+                max_distance_of_all_slices[new_slice_two, 0] = as.matrix(slice_dist_max);
+                // calc abs. idx of slice distance max: add slice_start to get absolute idx, +1 cause normalDistance returns withidx -1
+                slice_dist_absoluteArgMax = idxMax(slice_dist_norm, 1) + slice_start + 1;
+                abs_arg_max_distance_of_all_slices[new_slice_two, 0] = as.matrix(slice_dist_absoluteArgMax);
+            }
+        }
+        max_over_max_distance_of_all_slices = aggMax(max_distance_of_all_slices[:num_slices,0]);
+
+        if (max_over_max_distance_of_all_slices < tolerance) { // if the max dist of every slice is already below tolerance, abort
+            to_terminate = true;
+        }
+        else {
+            argMax_over_max_distance_of_all_slices = as.scalar<si64>(idxMax(max_distance_of_all_slices[:num_slices,0], 1));
+            idx_to_be_added = abs_arg_max_distance_of_all_slices[argMax_over_max_distance_of_all_slices,];
+            discarded_slice_end = slices[argMax_over_max_distance_of_all_slices, 1];
+            slices[argMax_over_max_distance_of_all_slices, 1] = idx_to_be_added;
+            slices[num_slices, 0] = idx_to_be_added;
+            slices[num_slices, 1] = discarded_slice_end;
+            new_slice_one = argMax_over_max_distance_of_all_slices;
+            new_slice_two = num_slices;
+            num_slices = num_slices + 1;
+        }
+        if ((num_slices+1) == max_points) {
+            to_terminate = true;
+        }
+    }
+    return order(slices[:(num_slices+1),0], 0, true, false);
+}
diff --git a/scripts/algorithms/VisvalingamWhyatt.daph b/scripts/algorithms/VisvalingamWhyatt.daph
@@ -0,0 +1,143 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Modifications 2024 The DAPHNE Consortium.
+#
+#-------------------------------------------------------------
+
+# vwLine: Implementation of the Visvalingam-Whyatt algorithm
+# See https://martinfleischmann.net/line-simplification-algorithms/
+#
+# INPUT:
+# ------------------------------------------------------------------------------
+# x_data          Single column matrix with x values
+# y_data          Single column matrix with y values
+# min_points      Min points to keep after reduction, must be >=2
+# max_points      Max points to keep after reduction
+# tolerance       Area of the triangle spanned by three points,
+#                 if smaller than tolerance, the middle point gets dropped
+# ------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------------------------------------
+# reduced_idxs    Indexes of the reduced points
+# ------------------------------------------------------------------------------
+
+def calcTriangles(x_data: matrix<f64>, y_data: matrix<f64>) -> matrix<f64> {
+
+    // compose matrix with three neighboring points (triple) in a row
+    points = cbind(x_data, y_data);
+    lhsIdx = nrow(points) - 2;
+    mhsIdx = nrow(points) - 1; // middle index
+    triples = cbind(points[:lhsIdx,], points[1:mhsIdx,]);
+    triples = cbind(triples, points[2:,]);
+
+    // calculate every area of the triangles which is spanned by every triple
+    triangle_areas = triples[,0] * (triples[,3] - triples[,5]) + triples[,2] * (triples[,5] - triples[,1]) + triples[,4] * (triples[,1] - triples[,3]);
+    triangle_areas = abs(triangle_areas * 0.5);
+
+    return triangle_areas;
+}
+
+
+def toTerminate(significance: matrix, min_points: si64, max_points: si64, tolerance: f64, tolerance_reached_before: bool) -> bool, bool {
+    to_terminate = false;
+    tolerance_reached = false;
+
+    min_points_exceeded = (nrow(significance) + 2) <= min_points; // +2 because of first and last point
+    if (min_points_exceeded) {
+        //print("TERM VW min points reached");
+        to_terminate = true;
+    }
+
+    above_max_points = ((nrow(significance)) + 2) > max_points; // +2 because of first and last point
+
+    tolerance_undershot = aggMin(significance) > tolerance;
+    if (tolerance_undershot) {
+        if (above_max_points) {
+            //if (tolerance_reached_before == 0)
+                //print("Tolerance reached, continue till max_points is satisfied");
+        }
+        else {
+            //print("TERM VW tolerance reached and max points satisfied");
+            to_terminate = true;
+        }
+        tolerance_reached = true;
+    }
+
+    return to_terminate, tolerance_reached;
+}
+
+
+def vwLine(x_data: matrix<f64>, y_data: matrix<f64>, min_points: si64, max_points: si64, tolerance: f64) -> matrix<si64> {
+
+    significance = calcTriangles(x_data, y_data);
+
+    to_terminate, tolerance_reached_before = toTerminate(significance, min_points, max_points, tolerance, false);
+    dropped_idxs = as.matrix<si64>([inf]); // idxs which are dropped, empty right now, inf has to be removed later
+    reduced_idxs = seq(0, nrow(y_data) - 1, 1); // idxs which are left after reduction steps
+    while(to_terminate == 0) {
+
+        argmin_significance = idxMin(significance, 1); // get idx of smallest triangle area
+
+        dropped_point = reduced_idxs[argmin_significance + 1,]; // +1 because first triangle/significance represents 2nd point
+        dropped_idxs = rbind(dropped_idxs, dropped_point); // TODO is not used, could be removed
+
+        // argmin_asScalar = argmin_significance[0,0] // WISH
+        argmin_asScalar = sum(as.matrix<si64>(argmin_significance)[0,0]); // HACK cast to unsigned that sum works, use sum() to get scalar
+
+        reduced_idxs = rbind(reduced_idxs[:(argmin_asScalar + 1),], reduced_idxs[(argmin_asScalar + 2):,]); // drop min significance
+        reduced_x = x_data[reduced_idxs,];
+        reduced_y = y_data[reduced_idxs,];
+
+        // TODO recalc triangles around dropped point
+        //if (argmin_asScalar == 0) { // if first triangle / second point of current reduced points gets dropped
+        //}
+        //else if (argmin_asScalar == (nrow(significance) - 1)) { // if last triangle / point before last point of current reduced points gets dropped
+        //}
+        //else { // if a point in between with two neighbors gets dropped
+            //print(x_data);
+            //new_x_left = x_data[(dropped_point_asScalar - 2) : dropped_point_asScalar,];
+            //new_x_right = x_data[(dropped_point_asScalar + 1), (dropped_point_asScalar + 2)];
+            //current_reduced_x = rbind(new_x_left, new_x_right);
+            //print(current_reduced_x);
+            //new_triangles = calcTriangles(x_data[,]);
+        //}
+
+        significance = calcTriangles(reduced_x, reduced_y);
+
+        to_terminate, tolerance_reached = toTerminate(significance, min_points, max_points, tolerance, tolerance_reached_before);
+        if (tolerance_reached)
+            tolerance_reached_before = true;
+    }
+
+    return reduced_idxs;
+}
+
+
+def cumTrapz(x_data: matrix<f64>, y_data: matrix<f64>) -> f64 {
+    // uses trapezoidal rule to approximate the area under curve
+    // expects both args wit same dims mx1
+
+    //TODO is this algo correct OR use abs around x_data subtract ???
+
+    max_idx = nrow(x_data) - 1;
+    trapz = (x_data[1:,] - x_data[:max_idx,]) * 0.5 * (y_data[:max_idx,] + y_data[1:,]); // trapezoidal rule
+    return sum(trapz);
+}