-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.sh
More file actions
executable file
·87 lines (70 loc) · 2.53 KB
/
run.sh
File metadata and controls
executable file
·87 lines (70 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env bash
set -e
# Avoid OpenBLAS oversubscription warnings + slowdowns
export OPENBLAS_NUM_THREADS=1
python - <<'PY'
import numpy as np
from src.data import prepare
from src.als_train import train_als
# Prefer LightFM if available, else LogisticMF
try:
from src.lightfm_train import train_lightfm
HAVE_LIGHTFM = True
except Exception:
HAVE_LIGHTFM = False
from src.lmf_train import train_lmf
from src.hybrid import score_als, score_lightfm, score_content_knn
from src.metrics import precision_at_k, recall_at_k, mapk, ndcg_at_k
def _as_len(a, n):
a = np.asarray(a).ravel()
if a.shape[0] == n:
return a
out = np.full(n, -np.inf, dtype=float)
out[: min(n, a.shape[0])] = a[: min(n, a.shape[0])]
return out
def blend_three(s1, s2, s3, w, n_items):
s1 = _as_len(s1, n_items)
s2 = _as_len(s2, n_items)
s3 = _as_len(s3, n_items)
w1, w2, w3 = w
return w1 * s1 + w2 * s2 + w3 * s3
def safe_topk(scores, k):
scores = np.asarray(scores).ravel()
finite_mask = np.isfinite(scores)
if not finite_mask.any():
return np.array([], dtype=int)
idx = np.where(finite_mask)[0]
k = max(1, min(k, idx.size))
part = np.argpartition(scores[idx], -k)[-k:]
top = idx[part][np.argsort(scores[idx][part])[::-1]]
return top
# Use the included sample dataset for a quick sanity run
R, Xc, meta = prepare("data/sample", dataset="sample")
n_items = Xc.shape[0]
als = train_als(R)
aux = train_lightfm(R, Xc) if HAVE_LIGHTFM else train_lmf(R)
test_gt, recommended = [], []
# Naive per-user holdout: remove last interaction (if any)
R_lil = R.tolil()
for u in range(R_lil.shape[0]):
if R_lil.rows[u]:
hold = R_lil.rows[u][-1]
R_lil.rows[u] = R_lil.rows[u][:-1]
R_lil.data[u] = R_lil.data[u][:-1]
test_gt.append([hold])
else:
test_gt.append([])
R_train = R_lil.tocsr()
for u in range(R_train.shape[0]):
s_als = score_als(als, u, als.item_factors)
s_aux = score_lightfm(aux, u, item_features=Xc) if HAVE_LIGHTFM else score_als(aux, u, aux.item_factors)
s_cnt = score_content_knn(Xc, R_train[u].indices)
s = blend_three(s_als, s_aux, s_cnt, w=(0.5, 0.4, 0.1), n_items=n_items)
s[R_train[u].indices] = -np.inf
top = safe_topk(s, 10)
recommended.append(top.tolist())
print("P@10", round(precision_at_k(recommended, test_gt, 10), 4))
print("R@10", round(recall_at_k(recommended, test_gt, 10), 4))
print("MAP@10", round(mapk(test_gt, recommended, 10), 4))
print("NDCG@10", round(ndcg_at_k(recommended, test_gt, 10), 4))
PY