|
| 1 | +#------------------------------------------------------------- |
| 2 | +# |
| 3 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 4 | +# or more contributor license agreements. See the NOTICE file |
| 5 | +# distributed with this work for additional information |
| 6 | +# regarding copyright ownership. The ASF licenses this file |
| 7 | +# to you under the Apache License, Version 2.0 (the |
| 8 | +# "License"); you may not use this file except in compliance |
| 9 | +# with the License. You may obtain a copy of the License at |
| 10 | +# |
| 11 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | +# |
| 13 | +# Unless required by applicable law or agreed to in writing, |
| 14 | +# software distributed under the License is distributed on an |
| 15 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 16 | +# KIND, either express or implied. See the License for the |
| 17 | +# specific language governing permissions and limitations |
| 18 | +# under the License. |
| 19 | +# |
| 20 | +# Modifications 2024 The DAPHNE Consortium. |
| 21 | +# |
| 22 | +#------------------------------------------------------------- |
| 23 | + |
| 24 | +# rdpLine: Implementation of the Ramer-Douglas-Peucker algorithm |
| 25 | +# See https://martinfleischmann.net/line-simplification-algorithms/ |
| 26 | +# |
| 27 | +# INPUT: |
| 28 | +# ------------------------------------------------------------------------------ |
| 29 | +# x_data Single column matrix with x values |
| 30 | +# y_data Single column matrix with y values |
| 31 | +# max_points Max points to keep after reduction |
| 32 | +# tolerance Width of the band around the auxiliary line |
| 33 | +# ------------------------------------------------------------------------------ |
| 34 | +# |
| 35 | +# OUTPUT: |
| 36 | +# ------------------------------------------------------------------------------ |
| 37 | +# reduced_idxs Indexes of the reduced points |
| 38 | +# ------------------------------------------------------------------------------ |
| 39 | + |
| 40 | +def crossProduct2R(lhs: matrix<f64>, rhs: matrix<f64>) -> matrix<f64> { |
| 41 | + // specific implementation of a cross prod in R2 |
| 42 | + // calculates the cross product for several R2 vectors at once |
| 43 | + // expects lhs matrix with dimensions 1x2 and rhs matrix with dim nx2 |
| 44 | + z = rhs[,1] * lhs[0,0] - rhs[,0] * lhs[0,1]; |
| 45 | + return z; |
| 46 | +} |
| 47 | + |
| 48 | + |
| 49 | +def frobeniusNormR2(vec: matrix<f64>) -> matrix<f64> { |
| 50 | + // calcs norm for 1x2 matrix (vector) |
| 51 | + return sqrt(pow(vec[0,0], 2) + pow(vec[0,1], 2)); |
| 52 | +} |
| 53 | + |
| 54 | + |
| 55 | +def normalDistance(points: matrix<f64>) -> matrix<f64> { |
| 56 | + // calculates the max distance of all points to the line between the first and the last point |
| 57 | + // normal distance means distance of point to line where distance vector has rect angle to aux line |
| 58 | + // line between p1 and p2; normal distance from that line to p3 |
| 59 | + // uses formular: abs(cross(p2-p1, p1-p3)) / norm(p2-p1) |
| 60 | + // expects input shape: num_points X 2 |
| 61 | + // returns output shape: (num_points - 2) X 1 |
| 62 | + |
| 63 | + line = points[(nrow(points) - 1),] - points[0,]; // vector from 1st point to last point |
| 64 | + vector_diffs = (points[1:(nrow(points) - 1),] - points[0,]) * -1; // p1-p3 for all p3 between p1 and p2 |
| 65 | + // for(i in 0:(nrow(points)-1)) { TODO not possible |
| 66 | + |
| 67 | + cross = crossProduct2R(line, vector_diffs); // calc cross product for all p3 |
| 68 | + abs_cross = abs(cross); |
| 69 | + |
| 70 | + |
| 71 | + line_frobenius_norm = frobeniusNormR2(line); |
| 72 | + dist_norm = abs_cross / line_frobenius_norm; // abs_cross/norm(p2-p1) |
| 73 | + return dist_norm; |
| 74 | +} |
| 75 | + |
| 76 | + |
| 77 | +def rdpLine(x_data: matrix<f64>, y_data: matrix<f64>, max_points: si64, tolerance: f64) { |
| 78 | + |
| 79 | + // normalize data |
| 80 | + x_data_norm = (x_data - aggMin(x_data)) / (aggMax(x_data) - aggMin(x_data)); |
| 81 | + y_data_norm = (y_data - aggMin(y_data)) / (aggMax(y_data) - aggMin(y_data)); |
| 82 | + |
| 83 | + // compose matrix with (x,y) doubles |
| 84 | + points = cbind(x_data_norm, y_data_norm); |
| 85 | + |
| 86 | + // slices -> max points -1 |
| 87 | + // values need only calculated for one slice less since then termination criterion is fullfilled |
| 88 | + // hence -2 |
| 89 | + state_arrays_size = max_points-2; |
| 90 | + |
| 91 | + last_idx = nrow(x_data_norm) - 1; |
| 92 | + slices = fill(last_idx, state_arrays_size+2, 2); // having one dummy slice in the end for the last point |
| 93 | + slices[0, :] = transpose(as.matrix([0, last_idx])); |
| 94 | + |
| 95 | + if (max_points <= 2) |
| 96 | + return slices; |
| 97 | + |
| 98 | + num_slices = 1; |
| 99 | + new_slice_one = 0; |
| 100 | + new_slice_two = -1; |
| 101 | + |
| 102 | + max_distance_of_all_slices = fill(-1.0, state_arrays_size, 1); // instanciate empty matrix, neg. value ensures that this wont be a max |
| 103 | + abs_arg_max_distance_of_all_slices = as.matrix<si64>(fill(inf, state_arrays_size, 1)); // same trick like line above, value will never be accessed |
| 104 | + |
| 105 | + to_terminate = false; |
| 106 | + while (to_terminate == false) { |
| 107 | + // 1st new slice |
| 108 | + slice_start = as.scalar<si64>(slices[new_slice_one,0]); |
| 109 | + slice_end = as.scalar<si64>(slices[new_slice_one,1]); |
| 110 | + if ((slice_end - slice_start) > 1){ // if points are right next to each other there is no distance to calc of points in between |
| 111 | + slice_dist_norm = normalDistance(points[slice_start:(slice_end+1),]); |
| 112 | + // calculate max/argmax normal distance of current slice |
| 113 | + slice_dist_max = aggMax(slice_dist_norm); |
| 114 | + max_distance_of_all_slices[new_slice_one, 0] = as.matrix(slice_dist_max); |
| 115 | + // calc abs. idx of slice distance max: add slice_start to get absolute idx, +1 cause normalDistance returns withidx -1 |
| 116 | + slice_dist_absoluteArgMax = idxMax(slice_dist_norm, 1) + slice_start + 1; |
| 117 | + abs_arg_max_distance_of_all_slices[new_slice_one, 0] = as.matrix(slice_dist_absoluteArgMax); |
| 118 | + } |
| 119 | + else { |
| 120 | + max_distance_of_all_slices[new_slice_one, 0] = as.matrix([-1.0]); |
| 121 | + } |
| 122 | + |
| 123 | + // 2nd new slice |
| 124 | + if (num_slices > 1){ |
| 125 | + slice_start = as.scalar<si64>(slices[new_slice_two,0]); |
| 126 | + slice_end = as.scalar<si64>(slices[new_slice_two,1]); |
| 127 | + if ((slice_end - slice_start) > 1){ // if points are right next to each other there is no distance to calc of points in between |
| 128 | + slice_dist_norm = normalDistance(points[slice_start:(slice_end+1),]); |
| 129 | + // calculate max/argmax normal distance of current slice |
| 130 | + slice_dist_max = aggMax(slice_dist_norm); |
| 131 | + max_distance_of_all_slices[new_slice_two, 0] = as.matrix(slice_dist_max); |
| 132 | + // calc abs. idx of slice distance max: add slice_start to get absolute idx, +1 cause normalDistance returns withidx -1 |
| 133 | + slice_dist_absoluteArgMax = idxMax(slice_dist_norm, 1) + slice_start + 1; |
| 134 | + abs_arg_max_distance_of_all_slices[new_slice_two, 0] = as.matrix(slice_dist_absoluteArgMax); |
| 135 | + } |
| 136 | + } |
| 137 | + max_over_max_distance_of_all_slices = aggMax(max_distance_of_all_slices[:num_slices,0]); |
| 138 | + |
| 139 | + if (max_over_max_distance_of_all_slices < tolerance) { // if the max dist of every slice is already below tolerance, abort |
| 140 | + to_terminate = true; |
| 141 | + } |
| 142 | + else { |
| 143 | + argMax_over_max_distance_of_all_slices = as.scalar<si64>(idxMax(max_distance_of_all_slices[:num_slices,0], 1)); |
| 144 | + idx_to_be_added = abs_arg_max_distance_of_all_slices[argMax_over_max_distance_of_all_slices,]; |
| 145 | + discarded_slice_end = slices[argMax_over_max_distance_of_all_slices, 1]; |
| 146 | + slices[argMax_over_max_distance_of_all_slices, 1] = idx_to_be_added; |
| 147 | + slices[num_slices, 0] = idx_to_be_added; |
| 148 | + slices[num_slices, 1] = discarded_slice_end; |
| 149 | + new_slice_one = argMax_over_max_distance_of_all_slices; |
| 150 | + new_slice_two = num_slices; |
| 151 | + num_slices = num_slices + 1; |
| 152 | + } |
| 153 | + if ((num_slices+1) == max_points) { |
| 154 | + to_terminate = true; |
| 155 | + } |
| 156 | + } |
| 157 | + return order(slices[:(num_slices+1),0], 0, true, false); |
| 158 | +} |
0 commit comments