Skip to content

Commit a464444

Browse files
committed
Switch to PyBind11
1 parent e70b773 commit a464444

23 files changed

+209
-292
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "pybind11"]
2+
path = pybind11
3+
url = https://github.com/pybind/pybind11.git

CMakeLists.txt

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ project(Fred LANGUAGES CXX C)
33

44
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
55
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Ofast")
6-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared")
6+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++")
77
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
88
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive") #supress error in older gcc
99
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
@@ -12,24 +12,14 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-trapping-math")
1212
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftree-vectorize")
1313
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopt-info-vec")
1414
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopt-info-loop")
15-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--no-undefined")
16-
#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++ -static")
1715

1816
include_directories(${CMAKE_SOURCE_DIR}/include)
1917

20-
find_package(PythonInterp REQUIRED)
21-
find_package(PythonLibs REQUIRED)
22-
find_package(Boost 1.63 COMPONENTS system chrono ${BPY} ${BNPY} REQUIRED)
2318
find_package(OpenMP REQUIRED)
2419

2520
add_definitions(-D_GLIBCXX_PARALLEL)
2621

27-
include_directories(${Boost_INCLUDE_DIRS})
28-
include_directories(${PYTHON_INCLUDE_DIRS})
29-
30-
link_libraries(${Boost_LIBRARIES})
31-
link_libraries(${PYTHON_LIBRARIES})
32-
22+
find_package(OpenMP)
3323
if(OpenMP_CXX_FOUND)
3424
link_libraries(OpenMP::OpenMP_CXX)
3525
endif()
@@ -40,10 +30,13 @@ if(NOT TARGET OpenMP::OpenMP_CXX)
4030
PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS})
4131
set_property(TARGET OpenMP::OpenMP_CXX
4232
PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS} Threads::Threads)
43-
link_libraries(OpenMP::OpenMP_CXX)
33+
4434
endif()
35+
link_libraries(OpenMP::OpenMP_CXX)
36+
37+
add_subdirectory(pybind11)
4538

46-
PYTHON_ADD_MODULE(backend
39+
pybind11_add_module(backend
4740
src/fred_python_wrapper.cpp
4841
src/curve.cpp
4942
src/point.cpp
File renamed without changes.
1.52 KB
Binary file not shown.
File renamed without changes.
File renamed without changes.

Makefile

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1+
all: pre install
2+
13
pre:
2-
sudo apt install -y libboost-all-dev
3-
sudo apt-get install -y python3-setuptools
4-
sudo apt-get install -y python3-numpy
5-
sudo apt-get install -y python3-matplotlib
6-
sudo apt-get install -y cmake
4+
git submodule init
5+
git submodule update
76

87
install:
9-
cd py && /usr/bin/python3 ./setup.py install --user
8+
python setup.py install --user
109

1110
clean:
12-
rm -r py/dist py/build/ py/Fred.egg-info/
13-
pip3 uninstall Fred -y
11+
rm -r dist build/ Fred_Frechet.egg-info/ & pip uninstall Fred-Frechet -y
1412

README.md

Lines changed: 18 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Fred ![alt text](https://raw.githubusercontent.com/derohde/Fred/master/logo/logo.png "Fred logo")
22
A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.
33

4+
### NOW USING PYBIND11 INSTEAD OF BOOST!
5+
46
## Ingredients C++ Backend
57
`import Fred.backend as fred`
68

@@ -52,55 +54,31 @@ By default, Fred will automatically determine the number of threads to use. If y
5254

5355
A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.discrete_klcenter` and `fred.discrete_klmedian`. As the name suggests, it stores the distances already computed.
5456

55-
#### discrete (k,l)-center clustering (continuous Fréchet) -- multiple calls
57+
#### discrete (k,l)-center clustering (continuous Fréchet)
5658
- from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616)
5759
- signature: `fred.discrete_klcenter_multi(k, l, curves, distances, center_domain, random_first_center)` with parameters
5860
- `k`: number of centers
5961
- `l`: maximum complexity of the centers, only used when center_domain is default value
60-
- `distances`: `fred.Distance_Matrix`
62+
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
6163
- `center_domain`: possible centers, defaults to empty `fred.Curves()`, in this case the input is simplified and used as center domain
6264
- `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true
6365
- returns: `fred.Clustering_Result` with mebers
6466
- `value`: objective value
6567
- `time`: running-time
6668
- `assignment`: empty if compute_assignment has not been called
6769

68-
#### discrete (k,l)-median clustering (continuous Fréchet) -- multiple calls
70+
#### discrete (k,l)-median clustering (continuous Fréchet)
6971
- Algorithm 6 in [**Coresets for (k,l)-Clustering under the Fréchet distance**](https://arxiv.org/pdf/1901.01870.pdf) + simplification
7072
- signature: `fred.discrete_klmedian_multi(k, l, curves, distances, center_domain)` with parameters
7173
- `k`: number of centers
7274
- `l`: maximum complexity of the centers, only used when center_domain is default value
73-
- `distances`: `fred.Distance_Matrix`
74-
- `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain
75-
- returns: `fred.Clustering_Result` with mebers
76-
- `value`: objective value
77-
- `time`: running-time
78-
- `assignment`: empty if compute_assignment has not been called
79-
80-
#### discrete (k,l)-center clustering (continuous Fréchet) -- oneshot
81-
- from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616)
82-
- signature: `fred.discrete_klcenter(k, l, curves, center_domain, random_first_center)` with parameters
83-
- `k`: number of centers
84-
- `l`: maximum complexity of the centers, only used when center_domain is default value
85-
- `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain
86-
- `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true
87-
- returns: `fred.Clustering_Result` with mebers
88-
- `value`: objective value
89-
- `time`: running-time
90-
- `assignment`: empty if compute_assignment has not been called
91-
92-
#### discrete (k,l)-median clustering (continuous Fréchet) -- oneshot
93-
- Algorithm 6 in [**Coresets for (k,l)-Clustering under the Fréchet distance**](https://arxiv.org/pdf/1901.01870.pdf) + simplification
94-
- signature: `fred.discrete_klmedian(k, l, curves, center_domain)` with parameters
95-
- `k`: number of centers
96-
- `l`: maximum complexity of the centers, only used when center_domain is default value
75+
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
9776
- `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain
9877
- returns: `fred.Clustering_Result` with mebers
9978
- `value`: objective value
10079
- `time`: running-time
10180
- `assignment`: empty if compute_assignment has not been called
10281

103-
10482
#### Clustering Result
10583
- signature: `fred.Clustering_Result`
10684
- methods: `len(fred.Clustering_Result)`: number of centers, `fred.Clustering_Result[i]`: get ith center, `fred.Clustering_Result.compute_assignment(fred.Curves)`: assigns every curve to its nearest center
@@ -112,27 +90,23 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di
11290

11391
### Dimension Reduction via Gaussian Random Projection
11492
- [Section 2 in **Random Projections and Sampling Algorithms for Clustering of High Dimensional Polygonal Curves**](https://papers.nips.cc/paper/9443-random-projections-and-sampling-algorithms-for-clustering-of-high-dimensional-polygonal-curves)
115-
- signature: `fred.dimension_reduction(curves, epsilon, empirical_constant)` with parameters `epsilon`: (1+epsilon) approximation parameter, `empirical_constant`: use constant of empirical study (faster, but less accurate)
93+
- signature: `fred.dimension_reduction(curves, epsilon, empirical_constant)` with parameters `epsilon`: (1+epsilon) approximation parameter, `empirical_constant`: use constant of empirical study (faster, but less accurate), defaults to `True`
11694
- returns: `fred.Curves` collection of curves
11795

11896
## Installation
119-
Get requirements under Ubuntu: `make pre`
120-
121-
Python3 installation into userdir: `make install`
12297

123-
### If something does not work with Boost
98+
### Requirements
12499

125-
Manual installation of Boost
100+
You have to have installed:
101+
- git
102+
- openmp available (should be a part of your compiler)
103+
104+
Thats it!
126105

127-
- `mkdir $HOME/boost` (This folder is hardcoded in setup.py, another location won't work.)
128-
- `cd /tmp`
129-
- `wget https://dl.bintray.com/boostorg/release/1.73.0/source/boost_1_73_0.tar.gz`
130-
- `tar -xzf boost_1_73_0.tar.gz`
131-
- `cd boost_1_73_0`
132-
- `./bootstrap.sh --with-python=/usr/bin/python3`
133-
- `./b2 install --prefix=$HOME/boost`
106+
### Installation Procedure
134107

135-
After that, go back to Freds folder and run `make clean` and then `make install`
108+
- Variant 1: simply run `pip install git+https://github.com/derohde/Fred`
109+
- Variant 2: clone repository and run `make` for installation into userdir
136110

137111
## Test
138112
Just run `python py/test.py`.
@@ -213,10 +187,10 @@ dm = fred.Distance_Matrix() # computing the Fréchet distance is costly,
213187

214188
for k in range(2, 6):
215189

216-
clustering = fred.discrete_klcenter_multi(k, 10, curves, dm)
190+
clustering = fred.discrete_klcenter(k, 10, curves, dm)
217191
print("clustering cost is {}".format(clustering.value))
218192

219-
clustering = fred.discrete_klmedian_multi(k, 10, curves, dm)
193+
clustering = fred.discrete_klmedian(k, 10, curves, dm)
220194
print("clustering cost is {}".format(clustering.value))
221195

222196
clustering.compute_assignment(curves)

include/clustering.hpp

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
1111
#pragma once
1212

1313
#include <unordered_map>
14-
15-
#include <boost/chrono/include.hpp>
14+
#include <chrono>
1615

1716
#include "random.hpp"
1817
#include "curve.hpp"
@@ -132,7 +131,7 @@ struct Clustering_Result {
132131

133132
Clustering_Result gonzalez(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool arya = false, const Curves &center_domain = Curves(), const bool random_start_center = true) {
134133

135-
const auto start = boost::chrono::process_real_cpu_clock::now();
134+
const auto start = std::chrono::high_resolution_clock::now();
136135
Clustering_Result result;
137136

138137
if (in.empty()) return result;
@@ -233,10 +232,10 @@ Clustering_Result gonzalez(const curve_number_t num_centers, const curve_size_t
233232
Curves simpl_centers;
234233
for (const auto center: centers) simpl_centers.push_back(simplified_in[center]);
235234

236-
auto end = boost::chrono::process_real_cpu_clock::now();
235+
auto end = std::chrono::high_resolution_clock::now();
237236
result.centers = simpl_centers;
238237
result.value = curr_maxdist;
239-
result.running_time = (end-start).count() / 1000000000.0;
238+
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
240239
return result;
241240
}
242241

@@ -245,7 +244,7 @@ Clustering_Result arya(const curve_number_t num_centers, const curve_size_t ell,
245244
}
246245

247246
Clustering_Result one_median_sampling(const curve_size_t ell, const Curves &in, const double epsilon, const Curves &center_domain = Curves()) {
248-
const auto start = boost::chrono::process_real_cpu_clock::now();
247+
const auto start = std::chrono::high_resolution_clock::now();
249248
Clustering_Result result;
250249
std::vector<curve_number_t> centers;
251250
const Curves &simplified_in = center_domain;
@@ -296,15 +295,15 @@ Clustering_Result one_median_sampling(const curve_size_t ell, const Curves &in,
296295
}
297296
centers.push_back(best_candidate);
298297

299-
auto end = boost::chrono::process_real_cpu_clock::now();
298+
auto end = std::chrono::high_resolution_clock::now();
300299
result.centers.push_back(simplified_in[centers[0]]);
301300
result.value = _center_cost_sum(in, simplified_in, centers, distances);
302-
result.running_time = (end-start).count() / 1000000000.0;
301+
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
303302
return result;
304303
}
305304

306305
Clustering_Result one_median_exhaustive(const curve_size_t ell, const Curves &in, const Curves &center_domain = Curves()) {
307-
const auto start = boost::chrono::process_real_cpu_clock::now();
306+
const auto start = std::chrono::high_resolution_clock::now();
308307
Clustering_Result result;
309308
std::vector<curve_number_t> centers;
310309
const Curves &simplified_in = center_domain;
@@ -344,15 +343,15 @@ Clustering_Result one_median_exhaustive(const curve_size_t ell, const Curves &in
344343
}
345344
centers.push_back(best_candidate);
346345

347-
auto end = boost::chrono::process_real_cpu_clock::now();
346+
auto end = std::chrono::high_resolution_clock::now();
348347
result.centers.push_back(simplified_in[centers[0]]);
349348
result.value = best_objective_value;
350-
result.running_time = (end-start).count() / 1000000000.0;
349+
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
351350
return result;
352351
}
353352

354353
Clustering_Result two_two_dtw_one_two_median(const Curves &in, const bool with_assignment = false) {
355-
const auto start = boost::chrono::process_real_cpu_clock::now();
354+
const auto start = std::chrono::high_resolution_clock::now();
356355
Clustering_Result result;
357356

358357
const auto n = in.size();
@@ -431,15 +430,15 @@ Clustering_Result two_two_dtw_one_two_median(const Curves &in, const bool with_a
431430
for (const auto &p : S1) cost += p.dist(mu1);
432431
for (const auto &p : S2) cost += p.dist(mu2);
433432

434-
auto end = boost::chrono::process_real_cpu_clock::now();
433+
auto end = std::chrono::high_resolution_clock::now();
435434
result.centers.push_back(center_curve);
436435
result.value = cost;
437-
result.running_time = (end-start).count() / 1000000000.0;
436+
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
438437
return result;
439438
}
440439

441440
Clustering_Result two_two_dtw_one_two_median_exact(const Curves &in, const bool with_assignment = false) {
442-
const auto start = boost::chrono::process_real_cpu_clock::now();
441+
const auto start = std::chrono::high_resolution_clock::now();
443442
Clustering_Result result;
444443
Curve best_center(in.dimensions());
445444
const auto infty = std::numeric_limits<distance_t>::infinity();
@@ -508,10 +507,10 @@ Clustering_Result two_two_dtw_one_two_median_exact(const Curves &in, const bool
508507
}
509508
}
510509

511-
auto end = boost::chrono::process_real_cpu_clock::now();
510+
auto end = std::chrono::high_resolution_clock::now();
512511
result.centers.push_back(best_center);
513512
result.value = best;
514-
result.running_time = (end-start).count() / 1000000000.0;
513+
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
515514
return result;
516515
}
517516

include/coreset.hpp

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,10 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
99
*/
1010
#pragma once
1111

12-
#include <boost/python.hpp>
13-
#include <boost/python/numpy.hpp>
14-
1512
#include "types.hpp"
1613
#include "clustering.hpp"
1714
#include "frechet.hpp"
1815

19-
namespace np = boost::python::numpy;
20-
namespace p = boost::python;
21-
2216
namespace Coreset {
2317

2418
class Onemedian_Coreset {
@@ -62,31 +56,31 @@ class Onemedian_Coreset {
6256
}
6357
}
6458

65-
inline np::ndarray get_lambda() const {
66-
np::dtype dt = np::dtype::get_builtin<distance_t>();
67-
p::list l;
68-
np::ndarray result = np::array(l, dt);
69-
for (const auto &elem: lambda) {
70-
l.append(elem);
71-
}
72-
result = np::array(l, dt);
73-
return result;
74-
}
59+
// inline np::ndarray get_lambda() const {
60+
// np::dtype dt = np::dtype::get_builtin<distance_t>();
61+
// p::list l;
62+
// np::ndarray result = np::array(l, dt);
63+
// for (const auto &elem: lambda) {
64+
// l.append(elem);
65+
// }
66+
// result = np::array(l, dt);
67+
// return result;
68+
// }
7569

7670
inline distance_t get_Lambda() const {
7771
return Lambda;
7872
}
7973

80-
inline np::ndarray get_curves() const {
81-
np::dtype dt = np::dtype::get_builtin<curve_number_t>();
82-
p::list l;
83-
np::ndarray result = np::array(l, dt);
84-
for (const auto &elem: coreset) {
85-
l.append(elem);
86-
}
87-
result = np::array(l, dt);
88-
return result;
89-
}
74+
// inline np::ndarray get_curves() const {
75+
// np::dtype dt = np::dtype::get_builtin<curve_number_t>();
76+
// p::list l;
77+
// np::ndarray result = np::array(l, dt);
78+
// for (const auto &elem: coreset) {
79+
// l.append(elem);
80+
// }
81+
// result = np::array(l, dt);
82+
// return result;
83+
// }
9084

9185
inline distance_t get_cost() const {
9286
return cost;

0 commit comments

Comments
 (0)