Skip to content

Commit 3d41f52

Browse files
authored
Add HRDR model (#518)
* Add HRDR model * refactor code * refactor code to use save and load function
1 parent 3ef1678 commit 3d41f52

File tree

8 files changed

+655
-0
lines changed

8 files changed

+655
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ The recommender models supported by Cornac are listed below. Why don't you join
112112
| | [Causal Inference for Visual Debiasing in Visually-Aware Recommendation (CausalRec)](cornac/models/causalrec), [paper](https://arxiv.org/abs/2107.02390) | [requirements.txt](cornac/models/causalrec/requirements.txt) | [causalrec_clothing.py](examples/causalrec_clothing.py)
113113
| | [Explainable Recommendation with Comparative Constraints on Product Aspects (ComparER)](cornac/models/comparer), [paper](https://dl.acm.org/doi/pdf/10.1145/3437963.3441754) | N/A | [PreferredAI/ComparER](https://github.com/PreferredAI/ComparER)
114114
| 2020 | [Adversarial Training Towards Robust Multimedia Recommender System (AMR)](cornac/models/amr), [paper](https://ieeexplore.ieee.org/document/8618394) | [requirements.txt](cornac/models/amr/requirements.txt) | [amr_clothing.py](examples/amr_clothing.py)
115+
| | [Hybrid neural recommendation with joint deep representation learning of ratings and reviews (HRDR)](cornac/models/hrdr), [paper](https://www.sciencedirect.com/science/article/abs/pii/S0925231219313207) | [requirements.txt](cornac/models/hrdr/requirements.txt) | [hrdr_example.py](examples/hrdr_example.py)
115116
| 2019 | [Embarrassingly Shallow Autoencoders for Sparse Data (EASEᴿ)](cornac/models/ease), [paper](https://arxiv.org/pdf/1905.03375.pdf) | N/A | [ease_movielens.py](examples/ease_movielens.py)
116117
| 2018 | [Collaborative Context Poisson Factorization (C2PF)](cornac/models/c2pf), [paper](https://www.ijcai.org/proceedings/2018/0370.pdf) | N/A | [c2pf_exp.py](examples/c2pf_example.py)
117118
| | [Multi-Task Explainable Recommendation (MTER)](cornac/models/mter), [paper](https://arxiv.org/pdf/1806.03568.pdf) | N/A | [mter_exp.py](examples/mter_example.py)

cornac/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from .global_avg import GlobalAvg
3737
from .hft import HFT
3838
from .hpf import HPF
39+
from .hrdr import HRDR
3940
from .ibpr import IBPR
4041
from .knn import ItemKNN
4142
from .knn import UserKNN

cornac/models/hrdr/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .recom_hrdr import HRDR

cornac/models/hrdr/hrdr.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
import numpy as np
2+
import tensorflow as tf
3+
from tensorflow import keras
4+
from tensorflow.keras import layers, initializers
5+
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
6+
7+
from ...utils import get_rng
8+
from ...utils.init_utils import uniform
9+
from ..narre.narre import TextProcessor, AddGlobalBias
10+
11+
12+
def get_data(batch_ids, train_set, max_text_length, by="user", max_num_review=32):
13+
batch_reviews, batch_num_reviews = [], []
14+
review_group = (
15+
train_set.review_text.user_review
16+
if by == "user"
17+
else train_set.review_text.item_review
18+
)
19+
for idx in batch_ids:
20+
review_ids = []
21+
for inc, (jdx, review_idx) in enumerate(review_group[idx].items()):
22+
if max_num_review is not None and inc == max_num_review:
23+
break
24+
review_ids.append(review_idx)
25+
reviews = train_set.review_text.batch_seq(
26+
review_ids, max_length=max_text_length
27+
)
28+
batch_reviews.append(reviews)
29+
batch_num_reviews.append(len(reviews))
30+
batch_reviews = pad_sequences(batch_reviews, maxlen=max_num_review, padding="post")
31+
batch_num_reviews = np.array(batch_num_reviews).astype(np.int32)
32+
batch_ratings = (
33+
np.zeros((len(batch_ids), train_set.num_items), dtype=np.float32)
34+
if by == "user"
35+
else np.zeros((len(batch_ids), train_set.num_users), dtype=np.float32)
36+
)
37+
rating_group = train_set.user_data if by == "user" else train_set.item_data
38+
for batch_inc, idx in enumerate(batch_ids):
39+
jds, ratings = rating_group[idx]
40+
for jdx, rating in zip(jds, ratings):
41+
batch_ratings[batch_inc, jdx] = rating
42+
return batch_reviews, batch_num_reviews, batch_ratings
43+
44+
class Model(keras.Model):
45+
def __init__(self, n_users, n_items, n_vocab, global_mean, embedding_matrix,
46+
n_factors=32, embedding_size=100, id_embedding_size=32,
47+
attention_size=16, kernel_sizes=[3], n_filters=64,
48+
n_user_mlp_factors=128, n_item_mlp_factors=128,
49+
dropout_rate=0.5, max_text_length=50):
50+
super().__init__()
51+
self.l_user_review_embedding = layers.Embedding(n_vocab, embedding_size, embeddings_initializer=embedding_matrix, mask_zero=True, name="user_review_embedding")
52+
self.l_item_review_embedding = layers.Embedding(n_vocab, embedding_size, embeddings_initializer=embedding_matrix, mask_zero=True, name="item_review_embedding")
53+
self.l_user_embedding = layers.Embedding(n_users, id_embedding_size, embeddings_initializer="uniform", name="user_embedding")
54+
self.l_item_embedding = layers.Embedding(n_items, id_embedding_size, embeddings_initializer="uniform", name="item_embedding")
55+
self.user_bias = layers.Embedding(n_users, 1, embeddings_initializer=tf.initializers.Constant(0.1), name="user_bias")
56+
self.item_bias = layers.Embedding(n_items, 1, embeddings_initializer=tf.initializers.Constant(0.1), name="item_bias")
57+
self.user_text_processor = TextProcessor(max_text_length, filters=n_filters, kernel_sizes=kernel_sizes, dropout_rate=dropout_rate, name='user_text_processor')
58+
self.item_text_processor = TextProcessor(max_text_length, filters=n_filters, kernel_sizes=kernel_sizes, dropout_rate=dropout_rate, name='item_text_processor')
59+
60+
self.l_user_mlp = keras.models.Sequential([
61+
layers.Dense(n_user_mlp_factors, input_dim=n_items, activation="relu"),
62+
layers.Dense(n_user_mlp_factors // 2, activation="relu"),
63+
layers.Dense(n_filters, activation="relu"),
64+
layers.BatchNormalization(),
65+
])
66+
self.l_item_mlp = keras.models.Sequential([
67+
layers.Dense(n_item_mlp_factors, input_dim=n_users, activation="relu"),
68+
layers.Dense(n_item_mlp_factors // 2, activation="relu"),
69+
layers.Dense(n_filters, activation="relu"),
70+
layers.BatchNormalization(),
71+
])
72+
self.a_user = keras.models.Sequential([
73+
layers.Dense(attention_size, activation="relu", use_bias=True),
74+
layers.Dense(1, activation=None, use_bias=True)
75+
])
76+
self.user_attention = layers.Softmax(axis=1, name="user_attention")
77+
self.a_item = keras.models.Sequential([
78+
layers.Dense(attention_size, activation="relu", use_bias=True),
79+
layers.Dense(1, activation=None, use_bias=True)
80+
])
81+
self.item_attention = layers.Softmax(axis=1, name="item_attention")
82+
self.ou_dropout = layers.Dropout(rate=dropout_rate)
83+
self.oi_dropout = layers.Dropout(rate=dropout_rate)
84+
self.ou = layers.Dense(n_factors, use_bias=True, name="ou")
85+
self.oi = layers.Dense(n_factors, use_bias=True, name="oi")
86+
self.W1 = layers.Dense(1, activation=None, use_bias=False, name="W1")
87+
self.add_global_bias = AddGlobalBias(init_value=global_mean, name="global_bias")
88+
89+
def call(self, inputs, training=False):
90+
i_user_id, i_item_id, i_user_rating, i_user_review, i_user_num_reviews, i_item_rating, i_item_review, i_item_num_reviews = inputs
91+
user_review_h = self.user_text_processor(self.l_user_review_embedding(i_user_review), training=training)
92+
item_review_h = self.item_text_processor(self.l_item_review_embedding(i_item_review), training=training)
93+
user_rating_h = self.l_user_mlp(i_user_rating)
94+
item_rating_h = self.l_item_mlp(i_item_rating)
95+
a_user = self.a_user(
96+
tf.multiply(
97+
user_review_h,
98+
tf.expand_dims(user_rating_h, 1)
99+
)
100+
)
101+
a_user_masking = tf.expand_dims(tf.sequence_mask(tf.reshape(i_user_num_reviews, [-1]), maxlen=i_user_review.shape[1]), -1)
102+
user_attention = self.user_attention(a_user, a_user_masking)
103+
a_item = self.a_item(
104+
tf.multiply(
105+
item_review_h,
106+
tf.expand_dims(item_rating_h, 1)
107+
)
108+
)
109+
a_item_masking = tf.expand_dims(tf.sequence_mask(tf.reshape(i_item_num_reviews, [-1]), maxlen=i_item_review.shape[1]), -1)
110+
item_attention = self.item_attention(a_item, a_item_masking)
111+
ou = tf.multiply(user_attention, user_review_h)
112+
ou = tf.reduce_sum(ou, 1)
113+
if training:
114+
ou = self.ou_dropout(ou, training=training)
115+
ou = self.ou(ou)
116+
oi = tf.multiply(item_attention, item_review_h)
117+
oi = tf.reduce_sum(oi, 1)
118+
if training:
119+
oi = self.oi_dropout(oi, training=training)
120+
oi = self.oi(oi)
121+
pu = tf.concat([
122+
user_rating_h,
123+
ou,
124+
self.l_user_embedding(i_user_id)
125+
], axis=-1)
126+
qi = tf.concat([
127+
item_rating_h,
128+
oi,
129+
self.l_item_embedding(i_item_id)
130+
], axis=-1)
131+
h0 = tf.multiply(pu, qi)
132+
r = self.add_global_bias(
133+
tf.add_n([
134+
self.W1(h0),
135+
self.user_bias(i_user_id),
136+
self.item_bias(i_item_id)
137+
])
138+
)
139+
return r
140+
141+
class HRDRModel:
142+
def __init__(self, n_users, n_items, vocab, global_mean,
143+
n_factors=32, embedding_size=100, id_embedding_size=32,
144+
attention_size=16, kernel_sizes=[3], n_filters=64,
145+
n_user_mlp_factors=128, n_item_mlp_factors=128,
146+
dropout_rate=0.5, max_text_length=50, max_num_review=32,
147+
pretrained_word_embeddings=None, verbose=False, seed=None):
148+
self.n_users = n_users
149+
self.n_items = n_items
150+
self.n_vocab = vocab.size
151+
self.global_mean = global_mean
152+
self.n_factors = n_factors
153+
self.embedding_size = embedding_size
154+
self.id_embedding_size = id_embedding_size
155+
self.attention_size = attention_size
156+
self.kernel_sizes = kernel_sizes
157+
self.n_filters = n_filters
158+
self.n_user_mlp_factors = n_user_mlp_factors
159+
self.n_item_mlp_factors = n_item_mlp_factors
160+
self.dropout_rate = dropout_rate
161+
self.max_text_length = max_text_length
162+
self.max_num_review = max_num_review
163+
self.verbose = verbose
164+
if seed is not None:
165+
self.rng = get_rng(seed)
166+
tf.random.set_seed(seed)
167+
168+
embedding_matrix = uniform(shape=(self.n_vocab, self.embedding_size), low=-0.5, high=0.5, random_state=self.rng)
169+
embedding_matrix[:4, :] = np.zeros((4, self.embedding_size))
170+
if pretrained_word_embeddings is not None:
171+
oov_count = 0
172+
for word, idx in vocab.tok2idx.items():
173+
embedding_vector = pretrained_word_embeddings.get(word)
174+
if embedding_vector is not None:
175+
embedding_matrix[idx] = embedding_vector
176+
else:
177+
oov_count += 1
178+
if self.verbose:
179+
print("Number of OOV words: %d" % oov_count)
180+
181+
embedding_matrix = initializers.Constant(embedding_matrix)
182+
self.graph = Model(
183+
self.n_users, self.n_items, self.n_vocab, self.global_mean, embedding_matrix,
184+
self.n_factors, self.embedding_size, self.id_embedding_size,
185+
self.attention_size, self.kernel_sizes, self.n_filters,
186+
self.n_user_mlp_factors, self.n_item_mlp_factors,
187+
self.dropout_rate, self.max_text_length
188+
)
189+
190+
def get_weights(self, train_set, batch_size=64):
191+
P = np.zeros((self.n_users, self.n_filters + self.n_factors + self.id_embedding_size))
192+
Q = np.zeros((self.n_items, self.n_filters + self.n_factors + self.id_embedding_size))
193+
A = np.zeros((self.n_items, self.max_num_review))
194+
for batch_users in train_set.user_iter(batch_size, shuffle=False):
195+
i_user_review, i_user_num_reviews, i_user_rating = get_data(batch_users, train_set, self.max_text_length, by='user', max_num_review=self.max_num_review)
196+
user_review_embedding = self.graph.l_user_review_embedding(i_user_review)
197+
user_review_h = self.graph.user_text_processor(user_review_embedding, training=False)
198+
user_rating_h = self.graph.l_user_mlp(i_user_rating)
199+
a_user = self.graph.a_user(
200+
tf.multiply(
201+
user_review_h,
202+
tf.expand_dims(user_rating_h, 1)
203+
)
204+
)
205+
a_user_masking = tf.expand_dims(tf.sequence_mask(tf.reshape(i_user_num_reviews, [-1]), maxlen=i_user_review.shape[1]), -1)
206+
user_attention = self.graph.user_attention(a_user, a_user_masking)
207+
ou = self.graph.ou(tf.reduce_sum(tf.multiply(user_attention, user_review_h), 1))
208+
pu = tf.concat([
209+
user_rating_h,
210+
ou,
211+
self.graph.l_user_embedding(batch_users)
212+
], axis=-1)
213+
P[batch_users] = pu.numpy()
214+
for batch_items in train_set.item_iter(batch_size, shuffle=False):
215+
i_item_review, i_item_num_reviews, i_item_rating = get_data(batch_items, train_set, self.max_text_length, by='item', max_num_review=self.max_num_review)
216+
item_review_embedding = self.graph.l_item_review_embedding(i_item_review)
217+
item_review_h = self.graph.item_text_processor(item_review_embedding, training=False)
218+
item_rating_h = self.graph.l_item_mlp(i_item_rating)
219+
a_item = self.graph.a_item(
220+
tf.multiply(
221+
item_review_h,
222+
tf.expand_dims(item_rating_h, 1)
223+
)
224+
)
225+
a_item_masking = tf.expand_dims(tf.sequence_mask(tf.reshape(i_item_num_reviews, [-1]), maxlen=i_item_review.shape[1]), -1)
226+
item_attention = self.graph.item_attention(a_item, a_item_masking)
227+
oi = self.graph.oi(tf.reduce_sum(tf.multiply(item_attention, item_review_h), 1))
228+
qi = tf.concat([
229+
item_rating_h,
230+
oi,
231+
self.graph.l_item_embedding(batch_items)
232+
], axis=-1)
233+
Q[batch_items] = qi.numpy()
234+
A[batch_items, :item_attention.shape[1]] = item_attention.numpy().reshape(item_attention.shape[:2])
235+
W1 = self.graph.W1.get_weights()[0]
236+
bu = self.graph.user_bias.get_weights()[0]
237+
bi = self.graph.item_bias.get_weights()[0]
238+
mu = self.graph.add_global_bias.get_weights()[0][0]
239+
return P, Q, W1, bu, bi, mu, A

0 commit comments

Comments
 (0)