@@ -45,6 +45,7 @@ def __init__(self,
4545 self .new_prob = paddle .assign (self .prob .astype ("float32" ))
4646 self .log_q = paddle .log (- (paddle .exp ((- paddle .log1p (self .new_prob ) * 2
4747 * n_sample )) - 1.0 ))
48+ self .loss = nn .CrossEntropyLoss (soft_label = True )
4849
4950 def sample (self , labels ):
5051 """Random sample neg_samples
@@ -65,6 +66,7 @@ def forward(self, inputs, labels, weights, bias):
6566 # weights.stop_gradient = False
6667 embedding_dim = paddle .shape (weights )[- 1 ]
6768 true_log_probs , samp_log_probs , neg_samples = self .sample (labels )
69+ # print(neg_samples)
6870 n_sample = neg_samples .shape [0 ]
6971
7072 b1 = paddle .shape (labels )[0 ]
@@ -82,33 +84,33 @@ def forward(self, inputs, labels, weights, bias):
8284 sample_b = all_b [- n_sample :]
8385
8486 # [B, D] * [B, 1,D]
85- true_logist = paddle .matmul (
86- true_w , inputs .unsqueeze (1 ), transpose_y = True ).squeeze (1 ) + true_b
87+ true_logist = paddle .sum (paddle .multiply (true_w , inputs .unsqueeze (1 )),
88+ axis = - 1 ) + true_b
89+ # print(true_logist)
8790
8891 sample_logist = paddle .matmul (
89- inputs .unsqueeze (1 ), sample_w , transpose_y = True ) + sample_b
90-
91- if self .subtract_log_q :
92- true_logist = true_logist - true_log_probs .unsqueeze (1 )
93- sample_logist = sample_logist - samp_log_probs
92+ inputs , sample_w , transpose_y = True ) + sample_b
9493
9594 if self .remove_accidental_hits :
96- hit = (paddle .equal (labels [:, :], neg_samples )). unsqueeze ( 1 )
95+ hit = (paddle .equal (labels [:, :], neg_samples ))
9796 padding = paddle .ones_like (sample_logist ) * - 1e30
9897 sample_logist = paddle .where (hit , padding , sample_logist )
9998
100- sample_logist = sample_logist .squeeze (1 )
99+ if self .subtract_log_q :
100+ true_logist = true_logist - true_log_probs .unsqueeze (1 )
101+ sample_logist = sample_logist - samp_log_probs
102+
101103 out_logist = paddle .concat ([true_logist , sample_logist ], axis = 1 )
102104 out_label = paddle .concat (
103105 [
104106 paddle .ones_like (true_logist ) / self .num_true ,
105107 paddle .zeros_like (sample_logist )
106108 ],
107109 axis = 1 )
110+ out_label .stop_gradient = True
108111
109- sampled_loss = F .softmax_with_cross_entropy (
110- logits = out_logist , label = out_label , soft_label = True )
111- return sampled_loss , out_logist , out_label
112+ loss = self .loss (out_logist , out_label )
113+ return loss , out_logist , out_label
112114
113115
114116class Mind_Capsual_Layer (nn .Layer ):
@@ -148,6 +150,7 @@ def __init__(self,
148150 name = "bilinear_mapping_matrix" , trainable = True ),
149151 default_initializer = nn .initializer .Normal (
150152 mean = 0.0 , std = self .init_std ))
153+ self .relu_layer = nn .Linear (self .output_units , self .output_units )
151154
152155 def squash (self , Z ):
153156 """squash
@@ -164,8 +167,10 @@ def sequence_mask(self, lengths, maxlen=None, dtype="bool"):
164167 batch_size = paddle .shape (lengths )[0 ]
165168 if maxlen is None :
166169 maxlen = lengths .max ()
167- row_vector = paddle .arange (0 , maxlen , 1 ).unsqueeze (0 ).expand (
168- shape = (batch_size , maxlen )).reshape ((batch_size , - 1 , maxlen ))
170+ row_vector = paddle .arange (
171+ 0 , maxlen ,
172+ 1 ).unsqueeze (0 ).expand (shape = (batch_size , maxlen )).reshape (
173+ (batch_size , - 1 , maxlen ))
169174 lengths = lengths .unsqueeze (- 1 )
170175 mask = row_vector < lengths
171176 return mask .astype (dtype )
@@ -182,39 +187,50 @@ def forward(self, item_his_emb, seq_len):
182187
183188 mask = self .sequence_mask (seq_len_tile , self .maxlen )
184189 pad = paddle .ones_like (mask , dtype = "float32" ) * (- 2 ** 32 + 1 )
185-
186190 # S*e
187191 low_capsule_new = paddle .matmul (item_his_emb ,
188192 self .bilinear_mapping_matrix )
189193
190- low_capsule_new_nograd = paddle .assign (low_capsule_new )
194+ low_capsule_new_tile = paddle .tile (low_capsule_new , [1 , 1 , self .k_max ])
195+ low_capsule_new_tile = paddle .reshape (
196+ low_capsule_new_tile ,
197+ [- 1 , self .maxlen , self .k_max , self .output_units ])
198+ low_capsule_new_tile = paddle .transpose (low_capsule_new_tile ,
199+ [0 , 2 , 1 , 3 ])
200+ low_capsule_new_tile = paddle .reshape (
201+ low_capsule_new_tile ,
202+ [- 1 , self .k_max , self .maxlen , self .output_units ])
203+ low_capsule_new_nograd = paddle .assign (low_capsule_new_tile )
191204 low_capsule_new_nograd .stop_gradient = True
192205
193206 B = paddle .tile (self .routing_logits ,
194207 [paddle .shape (item_his_emb )[0 ], 1 , 1 ])
208+ B .stop_gradient = True
195209
196210 for i in range (self .iters - 1 ):
197211 B_mask = paddle .where (mask , B , pad )
198212 # print(B_mask)
199213 W = F .softmax (B_mask , axis = 1 )
214+ W = paddle .unsqueeze (W , axis = 2 )
200215 high_capsule_tmp = paddle .matmul (W , low_capsule_new_nograd )
216+ # print(low_capsule_new_nograd.shape)
201217 high_capsule = self .squash (high_capsule_tmp )
202218 B_delta = paddle .matmul (
203- high_capsule , low_capsule_new_nograd , transpose_y = True )
204- B += B_delta / paddle .maximum (
205- paddle .norm (
206- B_delta , p = 2 , axis = - 1 , keepdim = True ),
207- paddle . ones_like ( B_delta ))
219+ low_capsule_new_nograd ,
220+ paddle .transpose ( high_capsule , [ 0 , 1 , 3 , 2 ]))
221+ B_delta = paddle .reshape (
222+ B_delta , shape = [ - 1 , self . k_max , self . maxlen ])
223+ B += B_delta
208224
209225 B_mask = paddle .where (mask , B , pad )
210226 W = F .softmax (B_mask , axis = 1 )
211- # paddle.static.Print(W)
212- high_capsule_tmp = paddle .matmul (W , low_capsule_new )
213- # high_capsule_tmp.stop_gradient = False
214-
215- high_capsule = self .squash (high_capsule_tmp )
216- # high_capsule.stop_gradient = False
227+ W = paddle .unsqueeze (W , axis = 2 )
228+ interest_capsule = paddle .matmul (W , low_capsule_new_tile )
229+ interest_capsule = self .squash (interest_capsule )
230+ high_capsule = paddle .reshape (interest_capsule ,
231+ [- 1 , self .k_max , self .output_units ])
217232
233+ high_capsule = F .relu (self .relu_layer (high_capsule ))
218234 return high_capsule , W , seq_len
219235
220236
@@ -246,6 +262,7 @@ def __init__(self,
246262 name = "item_emb" ,
247263 initializer = nn .initializer .XavierUniform (
248264 fan_in = item_count , fan_out = embedding_dim )))
265+ # print(self.item_emb.weight)
249266 self .embedding_bias = self .create_parameter (
250267 shape = (item_count , ),
251268 is_bias = True ,
@@ -267,11 +284,17 @@ def __init__(self,
267284 def label_aware_attention (self , keys , query ):
268285 """label_aware_attention
269286 """
270- weight = paddle .sum (keys * query , axis = - 1 , keepdim = True )
271- weight = paddle .pow (weight , self .pow_p ) # [x,k_max,1]
272- weight = F .softmax (weight , axis = 1 )
273- output = paddle .sum (keys * weight , axis = 1 )
274- return output , weight
287+ weight = paddle .matmul (keys ,
288+ paddle .reshape (query , [
289+ - 1 , paddle .shape (query )[- 1 ], 1
290+ ])) #[B, K, dim] * [B, dim, 1] == [B, k, 1]
291+ weight = paddle .squeeze (weight , axis = - 1 )
292+ weight = paddle .pow (weight , self .pow_p ) # [x,k_max]
293+ weight = F .softmax (weight ) #[x, k_max]
294+ weight = paddle .unsqueeze (weight , 1 ) #[B, 1, k_max]
295+ output = paddle .matmul (
296+ weight , keys ) #[B, 1, k_max] * [B, k_max, dim] => [B, 1, dim]
297+ return output .squeeze (1 ), weight
275298
276299 def forward (self , hist_item , seqlen , labels = None ):
277300 """forward
@@ -281,7 +304,7 @@ def forward(self, hist_item, seqlen, labels=None):
281304 seqlen : [B, 1]
282305 target : [B, 1]
283306 """
284-
307+ # print(hist_item)
285308 hit_item_emb = self .item_emb (hist_item ) # [B, seqlen, embed_dim]
286309 user_cap , cap_weights , cap_mask = self .capsual_layer (hit_item_emb ,
287310 seqlen )
0 commit comments