2121class Mind_SampledSoftmaxLoss_Layer (nn .Layer ):
2222 """SampledSoftmaxLoss with LogUniformSampler
2323 """
24-
2524 def __init__ (self ,
2625 num_classes ,
2726 n_sample ,
@@ -45,6 +44,7 @@ def __init__(self,
4544 self .new_prob = paddle .assign (self .prob .astype ("float32" ))
4645 self .log_q = paddle .log (- (paddle .exp ((- paddle .log1p (self .new_prob ) * 2
4746 * n_sample )) - 1.0 ))
47+ self .loss = nn .CrossEntropyLoss (soft_label = True )
4848
4949 def sample (self , labels ):
5050 """Random sample neg_samples
@@ -65,6 +65,7 @@ def forward(self, inputs, labels, weights, bias):
6565 # weights.stop_gradient = False
6666 embedding_dim = paddle .shape (weights )[- 1 ]
6767 true_log_probs , samp_log_probs , neg_samples = self .sample (labels )
68+ # print(neg_samples)
6869 n_sample = neg_samples .shape [0 ]
6970
7071 b1 = paddle .shape (labels )[0 ]
@@ -82,39 +83,38 @@ def forward(self, inputs, labels, weights, bias):
8283 sample_b = all_b [- n_sample :]
8384
8485 # [B, D] * [B, 1,D]
85- true_logist = paddle .matmul (
86- true_w , inputs .unsqueeze (1 ), transpose_y = True ).squeeze (1 ) + true_b
87-
86+ true_logist = paddle .sum (paddle .multiply (
87+ true_w , inputs .unsqueeze (1 )), axis = - 1 ) + true_b
88+ # print(true_logist)
89+
8890 sample_logist = paddle .matmul (
89- inputs .unsqueeze (1 ), sample_w , transpose_y = True ) + sample_b
91+ inputs , sample_w , transpose_y = True ) + sample_b
92+
93+ if self .remove_accidental_hits :
94+ hit = (paddle .equal (labels [:, :], neg_samples ))
95+ padding = paddle .ones_like (sample_logist ) * - 1e30
96+ sample_logist = paddle .where (hit , padding , sample_logist )
9097
9198 if self .subtract_log_q :
9299 true_logist = true_logist - true_log_probs .unsqueeze (1 )
93100 sample_logist = sample_logist - samp_log_probs
94101
95- if self .remove_accidental_hits :
96- hit = (paddle .equal (labels [:, :], neg_samples )).unsqueeze (1 )
97- padding = paddle .ones_like (sample_logist ) * - 1e30
98- sample_logist = paddle .where (hit , padding , sample_logist )
99-
100- sample_logist = sample_logist .squeeze (1 )
101102 out_logist = paddle .concat ([true_logist , sample_logist ], axis = 1 )
102103 out_label = paddle .concat (
103104 [
104105 paddle .ones_like (true_logist ) / self .num_true ,
105106 paddle .zeros_like (sample_logist )
106107 ],
107108 axis = 1 )
109+ out_label .stop_gradient = True
108110
109- sampled_loss = F .softmax_with_cross_entropy (
110- logits = out_logist , label = out_label , soft_label = True )
111- return sampled_loss , out_logist , out_label
111+ loss = self .loss (out_logist , out_label )
112+ return loss , out_logist , out_label
112113
113114
114115class Mind_Capsual_Layer (nn .Layer ):
115116 """Mind_Capsual_Layer
116117 """
117-
118118 def __init__ (self ,
119119 input_units ,
120120 output_units ,
@@ -148,6 +148,7 @@ def __init__(self,
148148 name = "bilinear_mapping_matrix" , trainable = True ),
149149 default_initializer = nn .initializer .Normal (
150150 mean = 0.0 , std = self .init_std ))
151+ self .relu_layer = nn .Linear (self .output_units , self .output_units )
151152
152153 def squash (self , Z ):
153154 """squash
@@ -182,39 +183,47 @@ def forward(self, item_his_emb, seq_len):
182183
183184 mask = self .sequence_mask (seq_len_tile , self .maxlen )
184185 pad = paddle .ones_like (mask , dtype = "float32" ) * (- 2 ** 32 + 1 )
185-
186186 # S*e
187187 low_capsule_new = paddle .matmul (item_his_emb ,
188188 self .bilinear_mapping_matrix )
189189
190- low_capsule_new_nograd = paddle .assign (low_capsule_new )
190+ low_capsule_new_tile = paddle .tile (low_capsule_new , [1 , 1 , self .k_max ])
191+ low_capsule_new_tile = paddle .reshape (
192+ low_capsule_new_tile , [- 1 , self .maxlen , self .k_max , self .output_units ])
193+ low_capsule_new_tile = paddle .transpose (
194+ low_capsule_new_tile , [0 , 2 , 1 , 3 ])
195+ low_capsule_new_tile = paddle .reshape (
196+ low_capsule_new_tile , [- 1 , self .k_max , self .maxlen , self .output_units ])
197+ low_capsule_new_nograd = paddle .assign (low_capsule_new_tile )
191198 low_capsule_new_nograd .stop_gradient = True
192199
193200 B = paddle .tile (self .routing_logits ,
194201 [paddle .shape (item_his_emb )[0 ], 1 , 1 ])
202+ B .stop_gradient = True
195203
196204 for i in range (self .iters - 1 ):
197205 B_mask = paddle .where (mask , B , pad )
198206 # print(B_mask)
199207 W = F .softmax (B_mask , axis = 1 )
208+ W = paddle .unsqueeze (W , axis = 2 )
200209 high_capsule_tmp = paddle .matmul (W , low_capsule_new_nograd )
210+ # print(low_capsule_new_nograd.shape)
201211 high_capsule = self .squash (high_capsule_tmp )
202- B_delta = paddle .matmul (
203- high_capsule , low_capsule_new_nograd , transpose_y = True )
204- B += B_delta / paddle .maximum (
205- paddle .norm (
206- B_delta , p = 2 , axis = - 1 , keepdim = True ),
207- paddle .ones_like (B_delta ))
212+ B_delta = paddle .matmul (low_capsule_new_nograd ,
213+ paddle .transpose (high_capsule , [0 , 1 , 3 , 2 ]))
214+ B_delta = paddle .reshape (
215+ B_delta , shape = [- 1 , self .k_max , self .maxlen ])
216+ B += B_delta
208217
209218 B_mask = paddle .where (mask , B , pad )
210219 W = F .softmax (B_mask , axis = 1 )
211- # paddle.static.Print(W)
212- high_capsule_tmp = paddle .matmul (W , low_capsule_new )
213- # high_capsule_tmp.stop_gradient = False
214-
215- high_capsule = self .squash (high_capsule_tmp )
216- # high_capsule.stop_gradient = False
220+ W = paddle .unsqueeze (W , axis = 2 )
221+ interest_capsule = paddle .matmul (W , low_capsule_new_tile )
222+ interest_capsule = self .squash (interest_capsule )
223+ high_capsule = paddle .reshape (
224+ interest_capsule , [- 1 , self .k_max , self .output_units ])
217225
226+ high_capsule = F .relu (self .relu_layer (high_capsule ))
218227 return high_capsule , W , seq_len
219228
220229
@@ -246,6 +255,7 @@ def __init__(self,
246255 name = "item_emb" ,
247256 initializer = nn .initializer .XavierUniform (
248257 fan_in = item_count , fan_out = embedding_dim )))
258+ # print(self.item_emb.weight)
249259 self .embedding_bias = self .create_parameter (
250260 shape = (item_count , ),
251261 is_bias = True ,
@@ -267,11 +277,13 @@ def __init__(self,
267277 def label_aware_attention (self , keys , query ):
268278 """label_aware_attention
269279 """
270- weight = paddle .sum (keys * query , axis = - 1 , keepdim = True )
271- weight = paddle .pow (weight , self .pow_p ) # [x,k_max,1]
272- weight = F .softmax (weight , axis = 1 )
273- output = paddle .sum (keys * weight , axis = 1 )
274- return output , weight
280+ weight = paddle .matmul (keys , paddle .reshape (query , [- 1 , paddle .shape (query )[- 1 ], 1 ])) #[B, K, dim] * [B, dim, 1] == [B, k, 1]
281+ weight = paddle .squeeze (weight , axis = - 1 )
282+ weight = paddle .pow (weight , self .pow_p ) # [x,k_max]
283+ weight = F .softmax (weight ) #[x, k_max]
284+ weight = paddle .unsqueeze (weight , 1 ) #[B, 1, k_max]
285+ output = paddle .matmul (weight , keys ) #[B, 1, k_max] * [B, k_max, dim] => [B, 1, dim]
286+ return output .squeeze (1 ), weight
275287
276288 def forward (self , hist_item , seqlen , labels = None ):
277289 """forward
@@ -281,7 +293,7 @@ def forward(self, hist_item, seqlen, labels=None):
281293 seqlen : [B, 1]
282294 target : [B, 1]
283295 """
284-
296+ # print(hist_item)
285297 hit_item_emb = self .item_emb (hist_item ) # [B, seqlen, embed_dim]
286298 user_cap , cap_weights , cap_mask = self .capsual_layer (hit_item_emb ,
287299 seqlen )
0 commit comments