1
1
# Sizes
2
2
DISTANCES_SIZE = 1000x15000
3
3
REGRESSION_SIZE = 1000000x50
4
- KMEANS_SIZE = $(REGRESSION_SIZE )
5
- SVM_VECTORS = 10000
4
+ KMEANS_SAMPLES = 1000000
5
+ KMEANS_FEATURES = 50
6
+ KMEANS_SIZE = $(KMEANS_SAMPLES ) x$(KMEANS_FEATURES )
7
+ SVM_SAMPLES = 10000
6
8
SVM_FEATURES = 1000
7
- ITERATIONS = ?
9
+ ITERATIONS = 10
8
10
9
11
# Bookkeeping options
10
12
BATCH = $(shell date -Iseconds)
@@ -15,18 +17,21 @@ NUM_THREADS = -1
15
17
SVM_NUM_THREADS = 0
16
18
MULTIPLIER = 100
17
19
DATA_DIR = data/
18
- KMEANS_DATA = $( addsuffix .csv, $( addprefix data/kmeans_, $(KMEANS_SIZE ) ) )
20
+ KMEANS_DATA = data/kmeans_$(KMEANS_SIZE ) .npy
19
21
20
22
comma = ,
21
23
22
24
ifneq ($(CONDA_PREFIX ) ,)
23
- LD_LIBRARY_PATH := $(CONDA_PREFIX ) /lib
25
+ LD_LIBRARY_PATH := $(LD_LIBRARY_PATH): $(CONDA_PREFIX)/lib
24
26
export LD_LIBRARY_PATH
25
27
endif
26
28
29
+ export I_MPI_ROOT
27
30
28
31
all : native python
29
32
33
+ python : sklearn daal4py
34
+
30
35
native : data
31
36
git submodule init && git submodule update
32
37
@echo " # Compiling native benchmarks"
@@ -40,56 +45,114 @@ native: data
40
45
$(NUM_THREADS ) double $(REGRESSION_SIZE )
41
46
native/bin/linear $(BATCH ) $(HOST ) native linear \
42
47
$(NUM_THREADS ) double $(REGRESSION_SIZE )
43
- native/bin/kmeans $(BATCH ) $(HOST ) native kmeans.fit \
44
- $(NUM_THREADS ) double $(REGRESSION_SIZE ) $(DATA_DIR )
45
- native/bin/kmeans_predict $(BATCH ) $(HOST ) native kmeans.predict \
46
- $(NUM_THREADS ) double $(REGRESSION_SIZE ) $(DATA_DIR ) $(MULTIPLIER )
48
+ native/bin/kmeans $(BATCH ) $(HOST ) native kmeans \
49
+ $(NUM_THREADS ) double $(KMEANS_SIZE ) $(DATA_DIR ) $(MULTIPLIER )
47
50
native/bin/two_class_svm \
48
- --fileX data/two/X-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy.csv \
49
- --fileY data/two/y-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy.csv \
50
- --num-threads $(SVM_NUM_THREADS )
51
+ --fileX data/two/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
52
+ --fileY data/two/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
53
+ --num-threads $(SVM_NUM_THREADS ) --header
51
54
native/bin/multi_class_svm \
52
- --fileX data/multi/X-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy.csv \
53
- --fileY data/multi/y-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy.csv \
54
- --num-threads $(SVM_NUM_THREADS )
55
+ --fileX data/multi/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
56
+ --fileY data/multi/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
57
+ --num-threads $(SVM_NUM_THREADS ) --header
58
+ native/bin/log_reg_lbfgs \
59
+ --fileX data/two/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
60
+ --fileY data/two/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
61
+ --num-threads $(SVM_NUM_THREADS ) --header
62
+ native/bin/log_reg_lbfgs \
63
+ --fileX data/multi/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
64
+ --fileY data/multi/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
65
+ --num-threads $(SVM_NUM_THREADS ) --header
66
+ native/bin/decision_forest_clsf \
67
+ --fileX data/two/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
68
+ --fileY data/two/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
69
+ --num-threads $(SVM_NUM_THREADS ) --header
70
+ native/bin/decision_forest_clsf \
71
+ --fileX data/multi/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
72
+ --fileY data/multi/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
73
+ --num-threads $(SVM_NUM_THREADS ) --header
55
74
56
- python : data
57
- @echo " # Running python benchmarks"
58
- python python /distances.py --batchID $(BATCH ) --arch $(HOST ) \
75
+ sklearn : data
76
+ @echo " # Running scikit-learn benchmarks"
77
+ python sklearn /distances.py --batchID $(BATCH ) --arch $(HOST ) \
59
78
--prefix python --core-number $(NUM_THREADS ) \
60
79
--size $(subst x,$(comma ) ,$(DISTANCES_SIZE ) ) --iteration $(ITERATIONS )
61
- python python /ridge.py --batchID $(BATCH ) --arch $(HOST ) \
80
+ python sklearn /ridge.py --batchID $(BATCH ) --arch $(HOST ) \
62
81
--prefix python --core-number $(NUM_THREADS ) \
63
82
--size $(subst x,$(comma ) ,$(REGRESSION_SIZE ) ) --iteration $(ITERATIONS )
64
- python python /linear.py --batchID $(BATCH ) --arch $(HOST ) \
83
+ python sklearn /linear.py --batchID $(BATCH ) --arch $(HOST ) \
65
84
--prefix python --core-number $(NUM_THREADS ) \
66
85
--size $(subst x,$(comma ) ,$(REGRESSION_SIZE ) ) --iteration $(ITERATIONS )
67
- python python /kmeans.py --batchID $(BATCH ) --arch $(HOST ) \
86
+ python sklearn /kmeans.py --batchID $(BATCH ) --arch $(HOST ) \
68
87
--prefix python --core-number $(NUM_THREADS ) \
69
88
--size $(subst x,$(comma ) ,$(KMEANS_SIZE ) ) --iteration $(ITERATIONS ) \
70
- --input $(DATA_DIR )
71
- python python/kmeans_predict.py --batchID $(BATCH ) --arch $(HOST ) \
89
+ -x $(KMEANS_DATA ) -i $(basename $(KMEANS_DATA ) ) .init.npy
90
+ python sklearn/svm_bench.py --core-number $(NUM_THREADS ) \
91
+ --fileX data/two/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
92
+ --fileY data/two/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
93
+ --header
94
+ python sklearn/svm_bench.py --core-number $(NUM_THREADS ) \
95
+ --fileX data/multi/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
96
+ --fileY data/multi/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
97
+ --header
98
+ python sklearn/log_reg.py --num-threads $(NUM_THREADS ) \
99
+ --fileX data/two/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
100
+ --fileY data/two/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
101
+ --header
102
+ python sklearn/log_reg.py --num-threads $(NUM_THREADS ) \
103
+ --fileX data/multi/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
104
+ --fileY data/multi/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
105
+ --header
106
+ python sklearn/df_clsf.py --num-threads $(NUM_THREADS ) \
107
+ --fileX data/two/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
108
+ --fileY data/two/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
109
+ --header
110
+ python sklearn/df_clsf.py --num-threads $(NUM_THREADS ) \
111
+ --fileX data/multi/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
112
+ --fileY data/multi/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
113
+ --header
114
+
115
+ daal4py : data
116
+ @echo " # Running daal4py benchmarks"
117
+ python daal4py/distances.py --batchID $(BATCH ) --arch $(HOST ) \
118
+ --prefix python --core-number $(NUM_THREADS ) \
119
+ --size $(subst x,$(comma ) ,$(DISTANCES_SIZE ) ) --iteration $(ITERATIONS )
120
+ python daal4py/ridge.py --batchID $(BATCH ) --arch $(HOST ) \
121
+ --prefix python --core-number $(NUM_THREADS ) \
122
+ --size $(subst x,$(comma ) ,$(REGRESSION_SIZE ) ) --iteration $(ITERATIONS )
123
+ python daal4py/linear.py --batchID $(BATCH ) --arch $(HOST ) \
124
+ --prefix python --core-number $(NUM_THREADS ) \
125
+ --size $(subst x,$(comma ) ,$(REGRESSION_SIZE ) ) --iteration $(ITERATIONS )
126
+ python daal4py/kmeans.py --batchID $(BATCH ) --arch $(HOST ) \
72
127
--prefix python --core-number $(NUM_THREADS ) \
73
128
--size $(subst x,$(comma ) ,$(KMEANS_SIZE ) ) --iteration $(ITERATIONS ) \
74
- --input $(DATA_DIR ) --data-multiplier $(MULTIPLIER )
75
- python python/svm_bench.py --core-number $(NUM_THREADS ) \
76
- --fileX data/two/X-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy \
77
- --fileY data/two/y-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy
78
- python python/svm_bench.py --core-number $(NUM_THREADS ) \
79
- --fileX data/multi/X-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy \
80
- --fileY data/multi/y-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy
129
+ -x $(KMEANS_DATA ) -i $(basename $(KMEANS_DATA ) ) .init.npy
130
+ python daal4py/svm_bench.py --core-number $(NUM_THREADS ) \
131
+ --fileX data/two/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
132
+ --fileY data/two/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
133
+ --header
134
+ python daal4py/svm_bench.py --core-number $(NUM_THREADS ) \
135
+ --fileX data/multi/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
136
+ --fileY data/multi/y-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
137
+ --header
81
138
82
139
data : $(KMEANS_DATA ) svm_data
83
140
84
141
$(KMEANS_DATA ) : | data/
85
- python python/kmeans_data.py --size \
86
- $(shell basename $@ .csv | cut -d _ -f 2) --fname $@ --clusters 10
142
+ python make_datasets.py -f $(KMEANS_FEATURES ) -s $(KMEANS_SAMPLES ) \
143
+ kmeans -c 10 -x $(basename $@ ) -i $(basename $@ ) .init \
144
+ -t $(basename $@ ) .tol
145
+
146
+ svm_data : data/two/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy \
147
+ data/multi/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy
87
148
88
- svm_data : data/two/X-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy.csv
149
+ data/two/X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy : | data/
150
+ python make_datasets.py -f $(SVM_FEATURES ) -s $(SVM_SAMPLES ) \
151
+ classification -c 2 -x $@ -y $(dir $@ ) /$(subst X-,y-,$(notdir $@ ) )
89
152
90
- data/two /X-$(SVM_VECTORS ) x$(SVM_FEATURES ) .npy.csv : | data/
91
- python python/svm_data .py -v $(SVM_VECTORS ) -f $(SVM_FEATURES )
92
- native/svm_native_data.sh
153
+ data/multi /X-$(SVM_SAMPLES ) x$(SVM_FEATURES ) .npy : | data/
154
+ python make_datasets .py -f $(SVM_FEATURES ) -s $(SVM_SAMPLES ) \
155
+ classification -c 5 -x $@ -y $( dir $@ ) / $( subst X-,y-, $( notdir $@ ) )
93
156
94
157
data/ :
95
158
mkdir -p data/
@@ -100,4 +163,4 @@ clean:
100
163
$(MAKE ) -C native clean
101
164
rm -rf data
102
165
103
- .PHONY : native python all clean native_data data kmeans_data svm_data
166
+ .PHONY : native python sklearn daal4py all clean native_data data kmeans_data svm_data
0 commit comments