Skip to content

Commit 8445763

Browse files
authored
Merge pull request #162 from VarIr/performance
WIP Python performance
2 parents 9b499a6 + 6af1b3d commit 8445763

File tree

1 file changed

+32
-39
lines changed

1 file changed

+32
-39
lines changed

python/thundersvm/thundersvm.py

Lines changed: 32 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -88,18 +88,14 @@ def __del__(self):
8888
if self.model is not None:
8989
thundersvm.model_free(c_void_p(self.model))
9090

91-
def label_validate(self, y):
92-
93-
return column_or_1d(y, warn=True).astype(np.float64)
94-
9591
def fit(self, X, y):
9692
if self.model is not None:
9793
thundersvm.model_free(c_void_p(self.model))
9894
self.model = None
9995
sparse = sp.isspmatrix(X)
10096
self._sparse = sparse and not callable(self.kernel)
10197
X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
102-
y = self.label_validate(y)
98+
y = column_or_1d(y, warn=True).astype(np.float64)
10399

104100
solver_type = SVM_TYPE.index(self._impl)
105101

@@ -129,40 +125,39 @@ def fit(self, X, y):
129125
data_size = (c_int * 1)()
130126
sv_indices = (c_int * self.n_sv)()
131127
thundersvm.get_sv(csr_row, csr_col, csr_data, data_size, sv_indices, c_void_p(self.model))
132-
self.row = np.array([csr_row[index] for index in range(0, self.n_sv + 1)])
133-
self.col = np.array([csr_col[index] for index in range(0, data_size[0])])
134-
self.data = np.array([csr_data[index] for index in range(0, data_size[0])])
128+
self.row = np.frombuffer(csr_row, dtype=np.int32)
129+
self.col = np.frombuffer(csr_col, dtype=np.int32)[:data_size[0]]
130+
self.data = np.frombuffer(csr_data, dtype=np.float32)[:data_size[0]]
135131

136132
self.support_vectors_ = sp.csr_matrix((self.data, self.col, self.row))
137-
if self._sparse == False:
133+
if not self._sparse:
138134
self.support_vectors_ = self.support_vectors_.toarray(order='C')
139-
self.support_ = np.array([sv_indices[index] for index in range(0, self.n_sv)]).astype(int)
135+
self.support_ = np.frombuffer(sv_indices, dtype=np.int32).astype(int)
140136

141137
dual_coef = (c_float * ((self.n_classes - 1) * self.n_sv))()
142138
thundersvm.get_coef(dual_coef, self.n_classes, self.n_sv, c_void_p(self.model))
143139

144-
self.dual_coef_ = np.array([dual_coef[index] for index in range(0, (self.n_classes - 1) * self.n_sv)]).astype(
145-
float)
146-
self.dual_coef_ = np.reshape(self.dual_coef_, (self.n_classes - 1, self.n_sv))
140+
self.dual_coef_ = np.frombuffer(dual_coef, dtype=np.float32)\
141+
.astype(float)\
142+
.reshape((self.n_classes - 1, self.n_sv))
147143

148144
rho_size = int(self.n_classes * (self.n_classes - 1) / 2)
149145
self.n_binary_model = rho_size
150146
rho = (c_float * rho_size)()
151147
thundersvm.get_rho(rho, rho_size, c_void_p(self.model))
152-
self.intercept_ = np.array([rho[index] for index in range(0, rho_size)]).astype(float)
148+
self.intercept_ = np.frombuffer(rho, dtype=np.float32).astype(float)
153149

154150
if self.kernel == 'linear':
155151
coef = (c_float * (self.n_binary_model * self.n_features))()
156152
thundersvm.get_linear_coef(coef, self.n_binary_model, self.n_features, c_void_p(self.model))
157-
self.coef_ = np.array([coef[index] for index in range(0, self.n_binary_model * self.n_features)]).astype(
158-
float)
159-
self.coef_ = np.reshape(self.coef_, (self.n_binary_model, self.n_features))
153+
self.coef_ = np.frombuffer(coef, dtype=np.float32)\
154+
.astype(float)\
155+
.reshape((self.n_binary_model, self.n_features))
160156

161157
n_support_ = (c_int * self.n_classes)()
162158
thundersvm.get_support_classes(n_support_, self.n_classes, c_void_p(self.model))
163159

164-
self.n_support_ = np.array([n_support_[index] for index in range(0, self.n_classes)]).astype(int)
165-
160+
self.n_support_ = np.frombuffer(n_support_, dtype=np.int32).astype(int)
166161
self.shape_fit_ = X.shape
167162

168163
return self
@@ -325,8 +320,8 @@ def predict_proba(self, X):
325320
# c_void_p(self.model),
326321
# self.predict_label_ptr)
327322
thundersvm.get_pro(c_void_p(self.model), self.predict_pro_ptr)
328-
self.predict_prob = np.array([self.predict_pro_ptr[index] for index in range(0, size)])
329-
self.predict_prob = np.reshape(self.predict_prob, (samples, self.n_classes))
323+
self.predict_prob = np.frombuffer(self.predict_pro_ptr, dtype=np.float32)\
324+
.reshape((samples, self.n_classes))
330325
return self.predict_prob
331326

332327
def _dense_predict(self, X):
@@ -344,7 +339,7 @@ def _dense_predict(self, X):
344339
c_void_p(self.model),
345340
self.predict_label_ptr, self.verbose)
346341

347-
self.predict_label = np.array([self.predict_label_ptr[index] for index in range(0, X.shape[0])])
342+
self.predict_label = np.frombuffer(self.predict_label_ptr, dtype=np.float32)
348343
return self.predict_label
349344

350345
def _sparse_predict(self, X):
@@ -360,9 +355,7 @@ def _sparse_predict(self, X):
360355
c_void_p(self.model),
361356
self.predict_label_ptr, self.verbose)
362357

363-
predict_label = [self.predict_label_ptr[index] for index in range(0, X.shape[0])]
364-
365-
self.predict_label = np.asarray(predict_label)
358+
self.predict_label = np.frombuffer(self.predict_label_ptr, dtype=np.float32)
366359
return self.predict_label
367360

368361
def decision_function(self, X):
@@ -392,8 +385,9 @@ def _dense_decision_function(self, X):
392385
thundersvm.dense_decision(
393386
samples, features, data, c_void_p(self.model), dec_size, dec_value_ptr
394387
)
395-
self.dec_values = np.array([dec_value_ptr[index] for index in range(0, dec_size)]).astype(float)
396-
self.dec_values = np.reshape(self.dec_values, (X.shape[0], self.n_binary_model))
388+
self.dec_values = np.frombuffer(dec_value_ptr, dtype=np.float32)\
389+
.astype(float)\
390+
.reshape((X.shape[0], self.n_binary_model))
397391
return self.dec_values
398392

399393
def _sparse_decision_function(self, X):
@@ -409,8 +403,8 @@ def _sparse_decision_function(self, X):
409403
thundersvm.sparse_decision(
410404
X.shape[0], data, indptr, indices,
411405
c_void_p(self.model), dec_size, dec_value_ptr)
412-
self.dec_values = np.array([dec_value_ptr[index] for index in range(0, dec_size)])
413-
self.dec_values = np.reshape(self.dec_values, (X.shape[0], self.n_binary_model))
406+
self.dec_values = np.frombuffer(dec_value_ptr, dtype=np.float32)\
407+
.reshape((X.shape[0], self.n_binary_model))
414408
return self.dec_values
415409

416410
def save_to_file(self, path):
@@ -435,8 +429,7 @@ def load_from_file(self, path):
435429
self.n_classes = n_classes[0]
436430
n_support_ = (c_int * self.n_classes)()
437431
thundersvm.get_support_classes(n_support_, self.n_classes, c_void_p(self.model))
438-
self.n_support_ = np.array([n_support_[index] for index in range(0, self.n_classes)]).astype(int)
439-
432+
self.n_support_ = np.frombuffer(n_support_, dtype=np.int32).astype(int)
440433
self.n_sv = thundersvm.n_sv(c_void_p(self.model))
441434

442435
n_feature = (c_int * 1)()
@@ -448,24 +441,24 @@ def load_from_file(self, path):
448441
data_size = (c_int * 1)()
449442
sv_indices = (c_int * self.n_sv)()
450443
thundersvm.get_sv(csr_row, csr_col, csr_data, data_size, sv_indices, c_void_p(self.model))
451-
self.row = np.array([csr_row[index] for index in range(0, self.n_sv + 1)])
452-
self.col = np.array([csr_col[index] for index in range(0, data_size[0])])
453-
self.data = np.array([csr_data[index] for index in range(0, data_size[0])])
444+
self.row = np.frombuffer(csr_row, dtype=np.int32)
445+
self.col = np.frombuffer(csr_col, dtype=np.int32)[:data_size[0]]
446+
self.data = np.frombuffer(csr_data, dtype=np.float32)[:data_size[0]]
454447
self.support_vectors_ = sp.csr_matrix((self.data, self.col, self.row))
455448
# if self._sparse == False:
456449
# self.support_vectors_ = self.support_vectors_.toarray(order = 'C')
457-
self.support_ = np.array([sv_indices[index] for index in range(0, self.n_sv)]).astype(int)
450+
self.support_ = np.frombuffer(sv_indices, dtype=np.int32)
458451
dual_coef = (c_float * ((self.n_classes - 1) * self.n_sv))()
459452
thundersvm.get_coef(dual_coef, self.n_classes, self.n_sv, c_void_p(self.model))
460-
self.dual_coef_ = np.array([dual_coef[index] for index in range(0, (self.n_classes - 1) * self.n_sv)]).astype(
461-
float)
462-
self.dual_coef_ = np.reshape(self.dual_coef_, (self.n_classes - 1, self.n_sv))
453+
self.dual_coef_ = np.frombuffer(dual_coef, dtype=np.float32)\
454+
.astype(float)\
455+
.reshape((self.n_classes - 1, self.n_sv))
463456

464457
rho_size = int(self.n_classes * (self.n_classes - 1) / 2)
465458
self.n_binary_model = rho_size
466459
rho = (c_float * rho_size)()
467460
thundersvm.get_rho(rho, rho_size, c_void_p(self.model))
468-
self.intercept_ = np.array([rho[index] for index in range(0, rho_size)]).astype(float)
461+
self.intercept_ = np.frombuffer(rho, dtype=np.float32).astype(float)
469462

470463
# if self.kernel == 'linear':
471464
# coef = (c_float * (self.n_binary_model * self.n_sv))()

0 commit comments

Comments
 (0)