Skip to content

Commit 96df7aa

Browse files
authored
Line simplification algorithms (#952)
* add rdp algorithm * add Visvalingam-Whyatt algorithm
1 parent bcaa8c7 commit 96df7aa

File tree

2 files changed

+301
-0
lines changed

2 files changed

+301
-0
lines changed
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
#-------------------------------------------------------------
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
#
20+
# Modifications 2024 The DAPHNE Consortium.
21+
#
22+
#-------------------------------------------------------------
23+
24+
# rdpLine: Implementation of the Ramer-Douglas-Peucker algorithm
25+
# See https://martinfleischmann.net/line-simplification-algorithms/
26+
#
27+
# INPUT:
28+
# ------------------------------------------------------------------------------
29+
# x_data Single column matrix with x values
30+
# y_data Single column matrix with y values
31+
# max_points Max points to keep after reduction
32+
# tolerance Width of the band around the auxiliary line
33+
# ------------------------------------------------------------------------------
34+
#
35+
# OUTPUT:
36+
# ------------------------------------------------------------------------------
37+
# reduced_idxs Indexes of the reduced points
38+
# ------------------------------------------------------------------------------
39+
40+
def crossProduct2R(lhs: matrix<f64>, rhs: matrix<f64>) -> matrix<f64> {
41+
// specific implementation of a cross prod in R2
42+
// calculates the cross product for several R2 vectors at once
43+
// expects lhs matrix with dimensions 1x2 and rhs matrix with dim nx2
44+
z = rhs[,1] * lhs[0,0] - rhs[,0] * lhs[0,1];
45+
return z;
46+
}
47+
48+
49+
def frobeniusNormR2(vec: matrix<f64>) -> matrix<f64> {
50+
// calcs norm for 1x2 matrix (vector)
51+
return sqrt(pow(vec[0,0], 2) + pow(vec[0,1], 2));
52+
}
53+
54+
55+
def normalDistance(points: matrix<f64>) -> matrix<f64> {
56+
// calculates the max distance of all points to the line between the first and the last point
57+
// normal distance means distance of point to line where distance vector has rect angle to aux line
58+
// line between p1 and p2; normal distance from that line to p3
59+
// uses formular: abs(cross(p2-p1, p1-p3)) / norm(p2-p1)
60+
// expects input shape: num_points X 2
61+
// returns output shape: (num_points - 2) X 1
62+
63+
line = points[(nrow(points) - 1),] - points[0,]; // vector from 1st point to last point
64+
vector_diffs = (points[1:(nrow(points) - 1),] - points[0,]) * -1; // p1-p3 for all p3 between p1 and p2
65+
// for(i in 0:(nrow(points)-1)) { TODO not possible
66+
67+
cross = crossProduct2R(line, vector_diffs); // calc cross product for all p3
68+
abs_cross = abs(cross);
69+
70+
71+
line_frobenius_norm = frobeniusNormR2(line);
72+
dist_norm = abs_cross / line_frobenius_norm; // abs_cross/norm(p2-p1)
73+
return dist_norm;
74+
}
75+
76+
77+
def rdpLine(x_data: matrix<f64>, y_data: matrix<f64>, max_points: si64, tolerance: f64) {
78+
79+
// normalize data
80+
x_data_norm = (x_data - aggMin(x_data)) / (aggMax(x_data) - aggMin(x_data));
81+
y_data_norm = (y_data - aggMin(y_data)) / (aggMax(y_data) - aggMin(y_data));
82+
83+
// compose matrix with (x,y) doubles
84+
points = cbind(x_data_norm, y_data_norm);
85+
86+
// slices -> max points -1
87+
// values need only calculated for one slice less since then termination criterion is fullfilled
88+
// hence -2
89+
state_arrays_size = max_points-2;
90+
91+
last_idx = nrow(x_data_norm) - 1;
92+
slices = fill(last_idx, state_arrays_size+2, 2); // having one dummy slice in the end for the last point
93+
slices[0, :] = transpose(as.matrix([0, last_idx]));
94+
95+
if (max_points <= 2)
96+
return slices;
97+
98+
num_slices = 1;
99+
new_slice_one = 0;
100+
new_slice_two = -1;
101+
102+
max_distance_of_all_slices = fill(-1.0, state_arrays_size, 1); // instanciate empty matrix, neg. value ensures that this wont be a max
103+
abs_arg_max_distance_of_all_slices = as.matrix<si64>(fill(inf, state_arrays_size, 1)); // same trick like line above, value will never be accessed
104+
105+
to_terminate = false;
106+
while (to_terminate == false) {
107+
// 1st new slice
108+
slice_start = as.scalar<si64>(slices[new_slice_one,0]);
109+
slice_end = as.scalar<si64>(slices[new_slice_one,1]);
110+
if ((slice_end - slice_start) > 1){ // if points are right next to each other there is no distance to calc of points in between
111+
slice_dist_norm = normalDistance(points[slice_start:(slice_end+1),]);
112+
// calculate max/argmax normal distance of current slice
113+
slice_dist_max = aggMax(slice_dist_norm);
114+
max_distance_of_all_slices[new_slice_one, 0] = as.matrix(slice_dist_max);
115+
// calc abs. idx of slice distance max: add slice_start to get absolute idx, +1 cause normalDistance returns withidx -1
116+
slice_dist_absoluteArgMax = idxMax(slice_dist_norm, 1) + slice_start + 1;
117+
abs_arg_max_distance_of_all_slices[new_slice_one, 0] = as.matrix(slice_dist_absoluteArgMax);
118+
}
119+
else {
120+
max_distance_of_all_slices[new_slice_one, 0] = as.matrix([-1.0]);
121+
}
122+
123+
// 2nd new slice
124+
if (num_slices > 1){
125+
slice_start = as.scalar<si64>(slices[new_slice_two,0]);
126+
slice_end = as.scalar<si64>(slices[new_slice_two,1]);
127+
if ((slice_end - slice_start) > 1){ // if points are right next to each other there is no distance to calc of points in between
128+
slice_dist_norm = normalDistance(points[slice_start:(slice_end+1),]);
129+
// calculate max/argmax normal distance of current slice
130+
slice_dist_max = aggMax(slice_dist_norm);
131+
max_distance_of_all_slices[new_slice_two, 0] = as.matrix(slice_dist_max);
132+
// calc abs. idx of slice distance max: add slice_start to get absolute idx, +1 cause normalDistance returns withidx -1
133+
slice_dist_absoluteArgMax = idxMax(slice_dist_norm, 1) + slice_start + 1;
134+
abs_arg_max_distance_of_all_slices[new_slice_two, 0] = as.matrix(slice_dist_absoluteArgMax);
135+
}
136+
}
137+
max_over_max_distance_of_all_slices = aggMax(max_distance_of_all_slices[:num_slices,0]);
138+
139+
if (max_over_max_distance_of_all_slices < tolerance) { // if the max dist of every slice is already below tolerance, abort
140+
to_terminate = true;
141+
}
142+
else {
143+
argMax_over_max_distance_of_all_slices = as.scalar<si64>(idxMax(max_distance_of_all_slices[:num_slices,0], 1));
144+
idx_to_be_added = abs_arg_max_distance_of_all_slices[argMax_over_max_distance_of_all_slices,];
145+
discarded_slice_end = slices[argMax_over_max_distance_of_all_slices, 1];
146+
slices[argMax_over_max_distance_of_all_slices, 1] = idx_to_be_added;
147+
slices[num_slices, 0] = idx_to_be_added;
148+
slices[num_slices, 1] = discarded_slice_end;
149+
new_slice_one = argMax_over_max_distance_of_all_slices;
150+
new_slice_two = num_slices;
151+
num_slices = num_slices + 1;
152+
}
153+
if ((num_slices+1) == max_points) {
154+
to_terminate = true;
155+
}
156+
}
157+
return order(slices[:(num_slices+1),0], 0, true, false);
158+
}
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#-------------------------------------------------------------
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
#
20+
# Modifications 2024 The DAPHNE Consortium.
21+
#
22+
#-------------------------------------------------------------
23+
24+
# vwLine: Implementation of the Visvalingam-Whyatt algorithm
25+
# See https://martinfleischmann.net/line-simplification-algorithms/
26+
#
27+
# INPUT:
28+
# ------------------------------------------------------------------------------
29+
# x_data Single column matrix with x values
30+
# y_data Single column matrix with y values
31+
# min_points Min points to keep after reduction, must be >=2
32+
# max_points Max points to keep after reduction
33+
# tolerance Area of the triangle spanned by three points,
34+
# if smaller than tolerance, the middle point gets dropped
35+
# ------------------------------------------------------------------------------
36+
#
37+
# OUTPUT:
38+
# ------------------------------------------------------------------------------
39+
# reduced_idxs Indexes of the reduced points
40+
# ------------------------------------------------------------------------------
41+
42+
def calcTriangles(x_data: matrix<f64>, y_data: matrix<f64>) -> matrix<f64> {
43+
44+
// compose matrix with three neighboring points (triple) in a row
45+
points = cbind(x_data, y_data);
46+
lhsIdx = nrow(points) - 2;
47+
mhsIdx = nrow(points) - 1; // middle index
48+
triples = cbind(points[:lhsIdx,], points[1:mhsIdx,]);
49+
triples = cbind(triples, points[2:,]);
50+
51+
// calculate every area of the triangles which is spanned by every triple
52+
triangle_areas = triples[,0] * (triples[,3] - triples[,5]) + triples[,2] * (triples[,5] - triples[,1]) + triples[,4] * (triples[,1] - triples[,3]);
53+
triangle_areas = abs(triangle_areas * 0.5);
54+
55+
return triangle_areas;
56+
}
57+
58+
59+
def toTerminate(significance: matrix, min_points: si64, max_points: si64, tolerance: f64, tolerance_reached_before: bool) -> bool, bool {
60+
to_terminate = false;
61+
tolerance_reached = false;
62+
63+
min_points_exceeded = (nrow(significance) + 2) <= min_points; // +2 because of first and last point
64+
if (min_points_exceeded) {
65+
//print("TERM VW min points reached");
66+
to_terminate = true;
67+
}
68+
69+
above_max_points = ((nrow(significance)) + 2) > max_points; // +2 because of first and last point
70+
71+
tolerance_undershot = aggMin(significance) > tolerance;
72+
if (tolerance_undershot) {
73+
if (above_max_points) {
74+
//if (tolerance_reached_before == 0)
75+
//print("Tolerance reached, continue till max_points is satisfied");
76+
}
77+
else {
78+
//print("TERM VW tolerance reached and max points satisfied");
79+
to_terminate = true;
80+
}
81+
tolerance_reached = true;
82+
}
83+
84+
return to_terminate, tolerance_reached;
85+
}
86+
87+
88+
def vwLine(x_data: matrix<f64>, y_data: matrix<f64>, min_points: si64, max_points: si64, tolerance: f64) -> matrix<si64> {
89+
90+
significance = calcTriangles(x_data, y_data);
91+
92+
to_terminate, tolerance_reached_before = toTerminate(significance, min_points, max_points, tolerance, false);
93+
dropped_idxs = as.matrix<si64>([inf]); // idxs which are dropped, empty right now, inf has to be removed later
94+
reduced_idxs = seq(0, nrow(y_data) - 1, 1); // idxs which are left after reduction steps
95+
while(to_terminate == 0) {
96+
97+
argmin_significance = idxMin(significance, 1); // get idx of smallest triangle area
98+
99+
dropped_point = reduced_idxs[argmin_significance + 1,]; // +1 because first triangle/significance represents 2nd point
100+
dropped_idxs = rbind(dropped_idxs, dropped_point); // TODO is not used, could be removed
101+
102+
// argmin_asScalar = argmin_significance[0,0] // WISH
103+
argmin_asScalar = sum(as.matrix<si64>(argmin_significance)[0,0]); // HACK cast to unsigned that sum works, use sum() to get scalar
104+
105+
reduced_idxs = rbind(reduced_idxs[:(argmin_asScalar + 1),], reduced_idxs[(argmin_asScalar + 2):,]); // drop min significance
106+
reduced_x = x_data[reduced_idxs,];
107+
reduced_y = y_data[reduced_idxs,];
108+
109+
// TODO recalc triangles around dropped point
110+
//if (argmin_asScalar == 0) { // if first triangle / second point of current reduced points gets dropped
111+
//}
112+
//else if (argmin_asScalar == (nrow(significance) - 1)) { // if last triangle / point before last point of current reduced points gets dropped
113+
//}
114+
//else { // if a point in between with two neighbors gets dropped
115+
//print(x_data);
116+
//new_x_left = x_data[(dropped_point_asScalar - 2) : dropped_point_asScalar,];
117+
//new_x_right = x_data[(dropped_point_asScalar + 1), (dropped_point_asScalar + 2)];
118+
//current_reduced_x = rbind(new_x_left, new_x_right);
119+
//print(current_reduced_x);
120+
//new_triangles = calcTriangles(x_data[,]);
121+
//}
122+
123+
significance = calcTriangles(reduced_x, reduced_y);
124+
125+
to_terminate, tolerance_reached = toTerminate(significance, min_points, max_points, tolerance, tolerance_reached_before);
126+
if (tolerance_reached)
127+
tolerance_reached_before = true;
128+
}
129+
130+
return reduced_idxs;
131+
}
132+
133+
134+
def cumTrapz(x_data: matrix<f64>, y_data: matrix<f64>) -> f64 {
135+
// uses trapezoidal rule to approximate the area under curve
136+
// expects both args wit same dims mx1
137+
138+
//TODO is this algo correct OR use abs around x_data subtract ???
139+
140+
max_idx = nrow(x_data) - 1;
141+
trapz = (x_data[1:,] - x_data[:max_idx,]) * 0.5 * (y_data[:max_idx,] + y_data[1:,]); // trapezoidal rule
142+
return sum(trapz);
143+
}

0 commit comments

Comments
 (0)