Skip to content

Commit 2b27833

Browse files
committed
rework continuous frechet distance lower bound and fix simplification nomenclature
1 parent 53d761e commit 2b27833

File tree

10 files changed

+109
-55
lines changed

10 files changed

+109
-55
lines changed

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ By default, Fred will automatically determine the number of threads to use. If y
2525
- returns: `fred.Continuous_Frechet_Result` with members `value`, `time_bounds`: running-time for upper and lower bound, `number_searches`: number of free space diagrams built, `time_searches`: running-time for free spaces
2626

2727
###### continuous Frechet distance config
28-
- approximation error: `fred.set_continuous_frechet_epsilon(epsilon)` with parameter `epsilon`, which defaults to 0.001
29-
- rounding (rounding up to 3 decimals): `fred.set_continuous_frechet_rounding(round)` with parameter `round`, which defaults to true
28+
- approximation error in percent of distance: `fred.set_continuous_frechet_error(double percent)` with parameter `percent`, which defaults to 1
29+
- rounding: `fred.set_continuous_frechet_rounding(round)` with parameter `round`, which defaults to true
3030

3131
#### discrete Fréchet distance
3232
- signature: `fred.discrete_frechet(curve1, curve2)`
@@ -40,19 +40,19 @@ By default, Fred will automatically determine the number of threads to use. If y
4040

4141
All simplifications are vertex-restricted!
4242

43-
#### weak minimum error simplification
43+
#### minimum error simplification
4444
- graph approach from [**Polygonal Approximations of a Curve — Formulations and Algorithms**](https://www.sciencedirect.com/science/article/pii/B9780444704672500114)
45-
- signature: `fred.weak_minimum_error_simplification(fred.Curve, int complexity)`
45+
- signature: `fred.minimum_error_simplification(fred.Curve, int complexity)`
4646
- returns: `fred.Curve`that uses input curves vertices, with `complexity` number of vertices and that has minimum distance to input curve
4747

48-
#### approximate weak minimum link simplification
48+
#### approximate minimum link simplification
4949
- algorithm "FS" from [**Near-Linear Time Approximation Algorithms for Curve Simplification**](https://link.springer.com/article/10.1007/s00453-005-1165-y)
50-
- signature: `fred.approximate_weak_minimum_error_simplification(fred.Curve, double error)`
50+
- signature: `fred.approximate_minimum_link_simplification(fred.Curve, double error)`
5151
- returns: `fred.Curve` that uses input curves vertices, is of small complexity and with distance to input curve at most `error`
5252

53-
#### approximate weak minimum error simplification
54-
- binary search on `fred.approximate_weak_minimum_link_simplification`
55-
- signature: `fred.approximate_weak_minimum_error_simplification(fred.Curve, int complexity)`
53+
#### approximate minimum error simplification
54+
- binary search on `fred.approximate_minimum_link_simplification`
55+
- signature: `fred.approximate_minimum_error_simplification(fred.Curve, int complexity)`
5656
- returns: `fred.Curve`that uses input curves vertices, with `complexity` number of vertices and that has small distance to input curve
5757

5858
### Clustering
@@ -68,7 +68,7 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di
6868
- `l`: maximum complexity of the centers
6969
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
7070
- `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true
71-
- `fast_simplification`: determines whether to use the weak minimum error simplification or the faster approximate weak minimum error simplification, defaults to false
71+
- `fast_simplification`: determines whether to use the minimum error simplification or the faster approximate minimum error simplification, defaults to false
7272
- returns: `fred.Clustering_Result` with mebers
7373
- `value`: objective value
7474
- `time`: running-time
@@ -80,7 +80,7 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di
8080
- `k`: number of centers
8181
- `l`: maximum complexity of the centers
8282
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
83-
- `fast_simplification`: determines whether to use the weak minimum error simplification or the faster approximate weak minimum error simplification, defaults to false
83+
- `fast_simplification`: determines whether to use the minimum error simplification or the faster approximate minimum error simplification, defaults to false
8484
- returns: `fred.Clustering_Result` with mebers
8585
- `value`: objective value
8686
- `time`: running-time
@@ -136,7 +136,7 @@ curve2d2 = fred.Curve(np.array([[1., -1.], [2., -2.], [3., -1.]]), "optional nam
136136
print(curve2d1)
137137

138138
Fred.plot_curve(curve2d1, curve2d2)
139-
Fred.plot_curve(curve2d2, fred.weak_minimum_error_simplification(curve2d2, 2))
139+
Fred.plot_curve(curve2d2, fred.minimum_error_simplification(curve2d2, 2))
140140

141141
print("distance is {}".format(fred.continuous_frechet(curve2d1, curve2d2).value))
142142

include/clustering.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,12 @@ Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_
140140

141141
auto simplify = [&](const curve_number_t i) {
142142
if (fast_simplification) {
143-
auto simplified_curve = Simplification::approximate_weak_minimum_error_simplification(const_cast<Curve&>(in[i]), ell);
143+
auto simplified_curve = Simplification::approximate_minimum_error_simplification(const_cast<Curve&>(in[i]), ell);
144144
simplified_curve.set_name("Simplification of " + in[i].get_name());
145145
return simplified_curve;
146146
} else {
147147
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[i]));
148-
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
148+
auto simplified_curve = graph.minimum_error_simplification(ell);
149149
simplified_curve.set_name("Simplification of " + in[i].get_name());
150150
return simplified_curve;
151151
}
@@ -268,7 +268,7 @@ Clustering_Result one_median_sampling(const curve_size_t ell, const Curves &in,
268268

269269
for (curve_number_t i = 0; i < in.size(); ++i) {
270270
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[i]));
271-
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
271+
auto simplified_curve = graph.minimum_error_simplification(ell);
272272
simplified_curve.set_name("Simplification of " + in[i].get_name());
273273
simplified_in_self[i] = simplified_curve;
274274
}
@@ -327,7 +327,7 @@ Clustering_Result one_median_exhaustive(const curve_size_t ell, const Curves &in
327327

328328
for (curve_number_t i = 0; i < in.size(); ++i) {
329329
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[i]));
330-
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
330+
auto simplified_curve = graph.minimum_error_simplification(ell);
331331
simplified_curve.set_name("Simplification of " + in[i].get_name());
332332
simplified_in_self[i] = simplified_curve;
333333
}

include/frechet.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
1818
namespace Frechet {
1919
namespace Continuous {
2020

21-
extern distance_t epsilon;
21+
extern distance_t error;
2222
extern bool round;
2323

2424
struct Distance {
@@ -39,6 +39,7 @@ namespace Continuous {
3939
std::vector<std::vector<Interval>>&, std::vector<std::vector<Interval>>&);
4040

4141
distance_t _greedy_upper_bound(const Curve&, const Curve&);
42+
distance_t _projective_lower_bound(const Curve&, const Curve&);
4243
}
4344
namespace Discrete {
4445

include/point.hpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
1515
#include <cmath>
1616
#include <string>
1717
#include <sstream>
18+
#include <type_traits>
1819

1920
#include <pybind11/pybind11.h>
2021
#include <pybind11/numpy.h>
@@ -92,7 +93,8 @@ class Point : public Coordinates {
9293
return result;
9394
}
9495

95-
inline Point operator*(const distance_t mult) const {
96+
template<typename T>
97+
inline Point operator*(const T mult) const {
9698
Point result = *this;
9799
#pragma omp simd
98100
for (dimensions_t i = 0; i < dimensions(); ++i){
@@ -146,7 +148,20 @@ class Point : public Coordinates {
146148
return std::sqrt(length_sqr());
147149
}
148150

149-
inline Interval intersection_interval(const distance_t distance_sqr, const Point &line_start, const Point &line_end) const {
151+
inline distance_t line_segment_dist_sqr(const Point &p1, const Point &p2) const {
152+
const Vector u = p2 - p1;
153+
parameter_t projection_param = (*this - p1) * u / (u * u);
154+
if (projection_param < parameter_t(0)) projection_param = parameter_t(0);
155+
else if (projection_param > parameter_t(1)) projection_param = parameter_t(1);
156+
const Point projection = p1 + u * projection_param;
157+
return projection.dist_sqr(*this);
158+
}
159+
160+
inline distance_t line_segment_dist(const Point &p1, const Point &p2) const {
161+
return std::sqrt(line_segment_dist(p1, p2));
162+
}
163+
164+
inline Interval ball_intersection_interval(const distance_t distance_sqr, const Point &line_start, const Point &line_end) const {
150165
const Vector u = line_end-line_start, v = *this - line_start;
151166
const parameter_t ulen_sqr = u.length_sqr(), vlen_sqr = v.length_sqr();
152167

include/simplification.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class Subcurve_Shortcut_Graph {
5252
}
5353
}
5454

55-
Curve weak_minimum_error_simplification(const curve_size_t ll) const {
55+
Curve minimum_error_simplification(const curve_size_t ll) const {
5656
if (ll >= curve.complexity()) return curve;
5757

5858
curve_size_t l = ll - 1;
@@ -111,7 +111,7 @@ class Subcurve_Shortcut_Graph {
111111

112112
};
113113

114-
Curve approximate_weak_minimum_link_simplification(const Curve&, const distance_t);
115-
Curve approximate_weak_minimum_error_simplification(const Curve&, const curve_size_t);
114+
Curve approximate_minimum_link_simplification(const Curve&, const distance_t);
115+
Curve approximate_minimum_error_simplification(const Curve&, const curve_size_t);
116116

117117
};

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def build_extension(self, ext):
7474

7575
setup(
7676
name='Fred-Frechet',
77-
version='1.7.7',
77+
version='1.8',
7878
author='Dennis Rohde',
7979
author_email='dennis.rohde@tu-dortmund.de',
8080
description='A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.',

src/curve.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,12 @@ Curves Curves::simplify(const curve_size_t l, const bool approx = false) {
6161
Curves result(size(), l, Curves::dimensions());
6262
for (curve_number_t i = 0; i < size(); ++i) {
6363
if (approx) {
64-
Curve simplified_curve = Simplification::approximate_weak_minimum_error_simplification(std::vector<Curve>::operator[](i), l);
64+
Curve simplified_curve = Simplification::approximate_minimum_error_simplification(std::vector<Curve>::operator[](i), l);
6565
simplified_curve.set_name("Simplification of " + std::vector<Curve>::operator[](i).get_name());
6666
result[i] = simplified_curve;
6767
} else {
6868
Simplification::Subcurve_Shortcut_Graph graph(std::vector<Curve>::operator[](i));
69-
Curve simplified_curve = graph.weak_minimum_error_simplification(l);
69+
Curve simplified_curve = graph.minimum_error_simplification(l);
7070
simplified_curve.set_name("Simplification of " + std::vector<Curve>::operator[](i).get_name());
7171
result[i] = simplified_curve;
7272
}

src/frechet.cpp

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace Frechet {
1818

1919
namespace Continuous {
2020

21-
distance_t epsilon = 0.001;
21+
distance_t error = 1;
2222
bool round = true;
2323

2424
std::string Distance::repr() const {
@@ -42,7 +42,7 @@ Distance distance(const Curve &curve1, const Curve &curve2) {
4242
}
4343

4444
auto start = std::chrono::high_resolution_clock::now();
45-
const distance_t lb = std::sqrt(std::max(curve1[0].dist_sqr(curve2[0]), curve1[curve1.complexity()-1].dist_sqr(curve2[curve2.complexity()-1])));
45+
const distance_t lb = _projective_lower_bound(curve1, curve2);
4646
const distance_t ub = _greedy_upper_bound(curve1, curve2);
4747
auto end = std::chrono::high_resolution_clock::now();
4848

@@ -52,7 +52,6 @@ Distance distance(const Curve &curve1, const Curve &curve2) {
5252

5353
auto dist = _distance(curve1, curve2, ub, lb);
5454
dist.time_bounds = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
55-
if (round) dist.value = std::round(dist.value * 1e3) / 1e3;
5655

5756
return dist;
5857
}
@@ -62,9 +61,10 @@ Distance _distance(const Curve &curve1, const Curve &curve2, distance_t ub, dist
6261
auto start = std::chrono::high_resolution_clock::now();
6362

6463
distance_t split = (ub + lb)/2;
64+
const distance_t p_error = lb > 0 ? lb * error / 100 : std::numeric_limits<distance_t>::epsilon();
6565
std::size_t number_searches = 0;
6666

67-
if (ub - lb > epsilon) {
67+
if (ub - lb > p_error) {
6868
auto infty = std::numeric_limits<parameter_t>::infinity();
6969
std::vector<std::vector<parameter_t>> reachable1(curve1.complexity() - 1, std::vector<parameter_t>(curve2.complexity(), infty));
7070
std::vector<std::vector<parameter_t>> reachable2(curve1.complexity(), std::vector<parameter_t>(curve2.complexity() - 1, infty));
@@ -78,7 +78,7 @@ Distance _distance(const Curve &curve1, const Curve &curve2, distance_t ub, dist
7878
}
7979

8080
//Binary search over the feasible distances
81-
while (ub - lb > epsilon) {
81+
while (ub - lb > p_error) {
8282
++number_searches;
8383
split = (ub + lb)/2;
8484
auto isLessThan = _less_than_or_equal(split, curve1, curve2, reachable1, reachable2, free_intervals1, free_intervals2);
@@ -95,6 +95,10 @@ Distance _distance(const Curve &curve1, const Curve &curve2, distance_t ub, dist
9595
}
9696

9797
distance_t value = (ub + lb)/2.;
98+
const distance_t p_error_exp = std::log10(p_error);
99+
const distance_t round_exp = p_error_exp >= 0 ? 0 : static_cast<unsigned int>(-p_error_exp);
100+
const distance_t round_fact = std::pow(10, round_exp);
101+
if (round) value = std::round(value * round_fact) / round_fact;
98102
auto end = std::chrono::high_resolution_clock::now();
99103
result.value = value;
100104
result.time_searches = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
@@ -136,10 +140,10 @@ bool _less_than_or_equal(const distance_t distance, Curve const& curve1, Curve c
136140
for (curve_size_t i = 0; i < n1; ++i) {
137141
for (curve_size_t j = 0; j < n2; ++j) {
138142
if ((i < n1 - 1) and (j > 0)) {
139-
free_intervals1[j][i] = curve2[j].intersection_interval(dist_sqr, curve1[i], curve1[i+1]);
143+
free_intervals1[j][i] = curve2[j].ball_intersection_interval(dist_sqr, curve1[i], curve1[i+1]);
140144
}
141145
if ((j < n2 - 1) and (i > 0)) {
142-
free_intervals2[i][j] = curve1[i].intersection_interval(dist_sqr, curve2[j], curve2[j+1]);
146+
free_intervals2[i][j] = curve1[i].ball_intersection_interval(dist_sqr, curve2[j], curve2[j+1]);
143147
}
144148
}
145149
}
@@ -201,6 +205,40 @@ distance_t _greedy_upper_bound(const Curve &curve1, const Curve &curve2) {
201205
return std::sqrt(result);
202206
}
203207

208+
distance_t _projective_lower_bound(const Curve &curve1, const Curve &curve2) {
209+
std::vector<distance_t> distances1_sqr = std::vector<distance_t>(curve2.complexity() - 1), distances2_sqr = std::vector<distance_t>(curve1.complexity() + curve2.complexity() + 2);
210+
211+
for (curve_size_t i = 0; i < curve1.complexity(); ++i) {
212+
#pragma omp parallel for
213+
for (curve_size_t j = 0; j < curve2.complexity() - 1; ++j) {
214+
if (curve2[j].dist_sqr(curve2[j+1]) > 0) {
215+
distances1_sqr[j] = curve1[i].line_segment_dist_sqr(curve2[j], curve2[j+1]);
216+
} else {
217+
distances1_sqr[j] = curve1[i].dist_sqr(curve2[j]);
218+
}
219+
}
220+
distances2_sqr[i] = *std::min_element(distances1_sqr.begin(), distances1_sqr.end());
221+
}
222+
223+
distances1_sqr = std::vector<distance_t>(curve1.complexity() - 1);
224+
225+
for (curve_size_t i = 0; i < curve2.complexity(); ++i) {
226+
#pragma omp parallel for
227+
for (curve_size_t j = 0; j < curve1.complexity() - 1; ++j) {
228+
if (curve1[j].dist_sqr(curve1[j+1]) > 0) {
229+
distances1_sqr[j] = curve2[i].line_segment_dist_sqr(curve1[j], curve1[j+1]);
230+
} else {
231+
distances1_sqr[j] = curve2[i].dist_sqr(curve1[j]);
232+
}
233+
}
234+
distances2_sqr[curve1.complexity() + i] = *std::min_element(distances1_sqr.begin(), distances1_sqr.end());
235+
}
236+
237+
distances2_sqr[curve1.complexity() + curve2.complexity()] = curve1[0].dist_sqr(curve2[0]);
238+
distances2_sqr[curve1.complexity() + curve2.complexity() + 1] = curve1[curve1.complexity()-1].dist_sqr(curve2[curve2.complexity()-1]);
239+
return std::sqrt(*std::max_element(distances2_sqr.begin(), distances2_sqr.end()));
240+
}
241+
204242
} // end namespace Continuous
205243

206244
namespace Discrete {

0 commit comments

Comments
 (0)