-
Notifications
You must be signed in to change notification settings - Fork 88
Expand file tree
/
Copy pathmf.py
More file actions
86 lines (77 loc) · 3.09 KB
/
mf.py
File metadata and controls
86 lines (77 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import time
def load_matrix(filename, num_users, num_items):
t0 = time.time()
counts = sparse.dok_matrix((num_users, num_items), dtype=float)
total = 0.0
num_zeros = num_users * num_items
for i, line in enumerate(open(filename, 'r')):
user, item, count = line.strip().split('\t')
user = int(user)
item = int(item)
count = float(count)
if user >= num_users:
continue
if item >= num_items:
continue
if count != 0:
counts[user, item] = count
total += count
num_zeros -= 1
if i % 100000 == 0:
print 'loaded %i counts...' % i
alpha = num_zeros / total
print 'alpha %.2f' % alpha
counts *= alpha
counts = counts.tocsr()
t1 = time.time()
print 'Finished loading matrix in %f seconds' % (t1 - t0)
return counts
class ImplicitMF():
def __init__(self, counts, num_factors=40, num_iterations=30,
reg_param=0.8):
self.counts = counts
self.num_users = counts.shape[0]
self.num_items = counts.shape[1]
self.num_factors = num_factors
self.num_iterations = num_iterations
self.reg_param = reg_param
def train_model(self):
self.user_vectors = np.random.normal(size=(self.num_users,
self.num_factors))
self.item_vectors = np.random.normal(size=(self.num_items,
self.num_factors))
for i in xrange(self.num_iterations):
t0 = time.time()
print 'Solving for user vectors...'
self.user_vectors = self.iteration(True, sparse.csr_matrix(self.item_vectors))
print 'Solving for item vectors...'
self.item_vectors = self.iteration(False, sparse.csr_matrix(self.user_vectors))
t1 = time.time()
print 'iteration %i finished in %f seconds' % (i + 1, t1 - t0)
def iteration(self, user, fixed_vecs):
num_solve = self.num_users if user else self.num_items
num_fixed = fixed_vecs.shape[0]
YTY = fixed_vecs.T.dot(fixed_vecs)
eye = sparse.eye(num_fixed)
lambda_eye = self.reg_param * sparse.eye(self.num_factors)
solve_vecs = np.zeros((num_solve, self.num_factors))
t = time.time()
for i in xrange(num_solve):
if user:
counts_i = self.counts[i].toarray()
else:
counts_i = self.counts[:, i].T.toarray()
CuI = sparse.diags(counts_i, [0])
pu = counts_i.copy()
pu[np.where(pu != 0)] = 1.0
YTCuIY = fixed_vecs.T.dot(CuI).dot(fixed_vecs)
YTCupu = fixed_vecs.T.dot(CuI + eye).dot(sparse.csr_matrix(pu).T)
xu = spsolve(YTY + YTCuIY + lambda_eye, YTCupu)
solve_vecs[i] = xu
if i % 1000 == 0:
print 'Solved %i vecs in %d seconds' % (i, time.time() - t)
t = time.time()
return solve_vecs