Skip to content
This repository was archived by the owner on Dec 6, 2023. It is now read-only.

Commit 195f4cd

Browse files
committed
Speedup: switch to typed memoryviews
replace sizes with Py_ssize_t fully remove vestiges of compute_loss add benchmark script cdivision in loss FIX windows failures: memory wasn't zeroed :-o
1 parent e0d79c2 commit 195f4cd

14 files changed

+44891
-11204
lines changed

appveyor.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ environment:
2828
PYTHON_ARCH: "64"
2929
MINICONDA: "C:\\Miniconda35-x64"
3030

31-
32-
3331
install:
3432
# Miniconda is pre-installed in the worker build
3533
- "SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%"

benchmarks/bench_20newsgroups.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Benchmark polynomial classifiers on bag-of-words text classification
2+
# Inspired from: https://github.com/scikit-learn/scikit-learn/blob/master
3+
# /benchmarks/bench_20newsgroups.py
4+
5+
from time import time
6+
7+
import numpy as np
8+
import scipy.sparse as sp
9+
10+
from sklearn.base import clone
11+
from sklearn.metrics import accuracy_score, f1_score
12+
from sklearn.datasets import fetch_20newsgroups_vectorized
13+
14+
from polylearn import (FactorizationMachineClassifier,
15+
PolynomialNetworkClassifier)
16+
17+
18+
estimators = {
19+
'fm-2': FactorizationMachineClassifier(n_components=30,
20+
fit_linear=False,
21+
fit_lower=None,
22+
degree=2,
23+
random_state=0,
24+
max_iter=10),
25+
26+
'polynet-2': PolynomialNetworkClassifier(n_components=15, degree=2,
27+
fit_lower=None,
28+
max_iter=10,
29+
random_state=0)
30+
}
31+
32+
estimators['fm-3'] = clone(estimators['fm-2']).set_params(degree=3)
33+
estimators['polynet-3'] = (clone(estimators['polynet-2'])
34+
.set_params(degree=3, n_components=10))
35+
36+
if __name__ == '__main__':
37+
data_train = fetch_20newsgroups_vectorized(subset="train")
38+
data_test = fetch_20newsgroups_vectorized(subset="test")
39+
X_train = sp.csc_matrix(data_train.data)
40+
X_test = sp.csc_matrix(data_test.data)
41+
42+
y_train = data_train.target == 0 # atheism vs rest
43+
y_test = data_test.target == 0
44+
45+
print("20 newsgroups")
46+
print("=============")
47+
print("X_train.shape = {0}".format(X_train.shape))
48+
print("X_train.format = {0}".format(X_train.format))
49+
print("X_train.dtype = {0}".format(X_train.dtype))
50+
print("X_train density = {0}"
51+
"".format(X_train.nnz / np.product(X_train.shape)))
52+
print("y_train {0}".format(y_train.shape))
53+
print("X_test {0}".format(X_test.shape))
54+
print("X_test.format = {0}".format(X_test.format))
55+
print("X_test.dtype = {0}".format(X_test.dtype))
56+
print("y_test {0}".format(y_test.shape))
57+
print()
58+
59+
print("Classifier Training")
60+
print("===================")
61+
f1, accuracy, train_time, test_time = {}, {}, {}, {}
62+
63+
for name, clf in sorted(estimators.items()):
64+
print("Training %s ... " % name, end="")
65+
t0 = time()
66+
clf.fit(X_train, y_train)
67+
train_time[name] = time() - t0
68+
t0 = time()
69+
y_pred = clf.predict(X_test)
70+
test_time[name] = time() - t0
71+
accuracy[name] = accuracy_score(y_test, y_pred)
72+
f1[name] = f1_score(y_test, y_pred)
73+
print("done")
74+
75+
print("Classification performance:")
76+
print("===========================")
77+
print()
78+
print("%s %s %s %s %s" % ("Classifier".ljust(16),
79+
"train".rjust(10),
80+
"test".rjust(10),
81+
"f1".rjust(10),
82+
"accuracy".rjust(10)))
83+
print("-" * (16 + 4 * 11))
84+
for name in sorted(f1, key=f1.get):
85+
print("%s %s %s %s %s" % (
86+
name.ljust(16),
87+
("%.4fs" % train_time[name]).rjust(10),
88+
("%.4fs" % test_time[name]).rjust(10),
89+
("%.4f" % f1[name]).rjust(10),
90+
("%.4f" % accuracy[name]).rjust(10)))
91+
92+
print()

0 commit comments

Comments
 (0)