Skip to content
This repository was archived by the owner on Dec 6, 2023. It is now read-only.

Commit 171fa7b

Browse files
committed
Cache kernel derivative, benchmark against fastFM
1 parent 195f4cd commit 171fa7b

File tree

3 files changed

+2940
-2539
lines changed

3 files changed

+2940
-2539
lines changed

benchmarks/bench_other_libs.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
""" Benchmarking CD solvers for factorization machines.
2+
3+
Compares polylearn with with fastFM [1].
4+
5+
[1] http://ibayer.github.io/fastFM/
6+
7+
Note: this benchmark uses the squared loss and a regression formulation, for
8+
the fairest comparison. The CD solvers in polylearn support logistic loss and
9+
squared hinge loss as well.
10+
11+
"""
12+
13+
from time import time
14+
15+
import numpy as np
16+
import scipy.sparse as sp
17+
18+
from sklearn.metrics import accuracy_score, f1_score
19+
from sklearn.datasets import fetch_20newsgroups_vectorized
20+
21+
from polylearn import FactorizationMachineRegressor
22+
if __name__ == '__main__':
23+
data_train = fetch_20newsgroups_vectorized(subset="train")
24+
data_test = fetch_20newsgroups_vectorized(subset="test")
25+
X_train = sp.csc_matrix(data_train.data)
26+
X_test = sp.csc_matrix(data_test.data)
27+
28+
y_train = data_train.target == 0 # atheism vs rest
29+
y_test = data_test.target == 0
30+
31+
y_train = (2 * y_train - 1).astype(np.float)
32+
33+
print(__doc__)
34+
print("20 newsgroups")
35+
print("=============")
36+
print("X_train.shape = {0}".format(X_train.shape))
37+
print("X_train.format = {0}".format(X_train.format))
38+
print("X_train.dtype = {0}".format(X_train.dtype))
39+
print("X_train density = {0}"
40+
"".format(X_train.nnz / np.product(X_train.shape)))
41+
print("y_train {0}".format(y_train.shape))
42+
print("X_test {0}".format(X_test.shape))
43+
print("X_test.format = {0}".format(X_test.format))
44+
print("X_test.dtype = {0}".format(X_test.dtype))
45+
print("y_test {0}".format(y_test.shape))
46+
print()
47+
48+
print("Training regressors")
49+
print("===================")
50+
f1, accuracy, train_time, test_time = {}, {}, {}, {}
51+
52+
print("Training our solver... ", end="")
53+
fm = FactorizationMachineRegressor(n_components=20,
54+
fit_linear=True,
55+
fit_lower=False,
56+
alpha=5,
57+
beta=5,
58+
degree=2,
59+
random_state=0,
60+
max_iter=100)
61+
t0 = time()
62+
fm.fit(X_train, y_train)
63+
train_time['polylearn'] = time() - t0
64+
t0 = time()
65+
y_pred = fm.predict(X_test) > 0
66+
test_time['polylearn'] = time() - t0
67+
accuracy['polylearn'] = accuracy_score(y_test, y_pred)
68+
f1['polylearn'] = f1_score(y_test, y_pred)
69+
print("done")
70+
71+
try:
72+
from fastFM import als
73+
74+
print("Training fastfm... ", end="")
75+
clf = als.FMRegression(n_iter=100, init_stdev=0.01, rank=20,
76+
random_state=0, l2_reg=10.)
77+
clf.ignore_w_0 = True # since polylearn has no fit_intercept yet
78+
t0 = time()
79+
80+
clf.fit(X_train, y_train)
81+
train_time['fastfm'] = time() - t0
82+
83+
t0 = time()
84+
y_pred = clf.predict(X_test)
85+
test_time['fastfm'] = time() - t0
86+
y_pred = y_pred > 0
87+
accuracy['fastfm'] = accuracy_score(y_test, y_pred)
88+
f1['fastfm'] = f1_score(y_test, y_pred)
89+
90+
print("done")
91+
except ImportError:
92+
print("fastfm not found")
93+
94+
print("Regression performance:")
95+
print("=======================")
96+
print()
97+
print("%s %s %s %s %s" % ("Model".ljust(16),
98+
"train".rjust(10),
99+
"test".rjust(10),
100+
"f1".rjust(10),
101+
"accuracy".rjust(10)))
102+
print("-" * (16 + 4 * 11))
103+
for name in sorted(f1, key=f1.get):
104+
print("%s %s %s %s %s" % (
105+
name.ljust(16),
106+
("%.4fs" % train_time[name]).rjust(10),
107+
("%.4fs" % test_time[name]).rjust(10),
108+
("%.4f" % f1[name]).rjust(10),
109+
("%.4f" % accuracy[name]).rjust(10)))
110+
111+
print()

0 commit comments

Comments
 (0)