update escm2

wangzhen38 · wangzhen38 · commit 02bc4f576a98 · 2022-05-31T13:03:07.000Z
diff --git a/models/multitask/escm2/config.yaml b/models/multitask/escm2/config.yaml
@@ -22,12 +22,12 @@ runner:
   train_batch_size: 2
   epochs: 3
   print_interval: 2
-  #model_init_path: "output_model_esmm/2" # init model
-  model_save_path: "output_model_esmm"
+  #model_init_path: "output_model_escm/2" # init model
+  model_save_path: "output_model_escm"
   test_data_dir: "data/train"
   infer_batch_size: 2
-  infer_reader_path: "esmm_reader" # importlib format
-  infer_load_path: "output_model_esmm"
+  infer_reader_path: "escm_reader" # importlib format
+  infer_load_path: "output_model_escm"
   infer_start_epoch: 0
   infer_end_epoch: 3
   counterfact_mode: "DR"
diff --git a/models/multitask/escm2/config_bigdata.yaml b/models/multitask/escm2/config_bigdata.yaml
@@ -23,11 +23,11 @@ runner:
   epochs: 10
   print_interval: 10
   #model_init_path: "output_model/0" # init model
-  model_save_path: "output_model_esmm_all"
+  model_save_path: "output_model_escm_all"
   test_data_dir: "../../../datasets/ali-ccp/test_data"
   infer_batch_size: 1024
-  infer_reader_path: "esmm_reader" # importlib format
-  infer_load_path: "output_model_esmm_all"
+  infer_reader_path: "escm_reader" # importlib format
+  infer_load_path: "output_model_escm_all"
   infer_start_epoch: 0
   infer_end_epoch: 10
   counterfact_mode: "DR"
diff --git a/models/multitask/escm2/dygraph_model.py b/models/multitask/escm2/dygraph_model.py
@@ -79,7 +79,7 @@ def create_loss(self, ctr_out_one, ctr_clk, ctcvr_prop_one, ctcvr_buy,
         ctr_num = paddle.sum(ctr_clk, axis=0)
         O = paddle.cast(ctr_clk, 'float32')
         if self.counterfact_mode == "DR":
-            loss_cvr = self.counterfact_dr(loss_cvr, ctr_num, O, ctr_out_one,
+            loss_cvr = self.counterfact_dr(loss_cvr, O, ctr_out_one,
                                            out_list[6])
         else:
             loss_cvr = self.counterfact_ipw(loss_cvr, ctr_num, O, ctr_out_one)
@@ -109,39 +109,43 @@ def counterfact_ipw(self, loss_cvr, ctr_num, O, ctr_out_one):
         PS = paddle.multiply(
             ctr_out_one, paddle.cast(
                 ctr_num, dtype="float32"))
-        PS = paddle.multiply(PS, paddle.cast(ctr_num, dtype="float32"))
         min_v = paddle.full_like(PS, 0.000001)
         PS = paddle.maximum(PS, min_v)
         IPS = paddle.reciprocal(PS)
-        #batch_shape = paddle.full_like(O, 1)
-        #batch_size = paddle.sum(paddle.cast(batch_shape, dtype="float32"), axis=0)
+        batch_shape = paddle.full_like(O, 1)
+        batch_size = paddle.sum(paddle.cast(
+            batch_shape, dtype="float32"),
+                                axis=0)
         #TODO this shoud be a hyparameter
         IPS = paddle.clip(IPS, min=-15, max=15)  #online trick 
-        #IPS = paddle.multiply(IPS, batch_size)
+        IPS = paddle.multiply(IPS, batch_size)
         IPS.stop_gradient = True
         loss_cvr = paddle.multiply(loss_cvr, IPS)
         loss_cvr = paddle.multiply(loss_cvr, O)
-        return loss_cvr
+        return paddle.mean(loss_cvr)
 
-    def counterfact_dr(self, loss_cvr, ctr_num, O, ctr_out_one, imp_out):
+    def counterfact_dr(self, loss_cvr, O, ctr_out_one, imp_out):
         #dr error part
-        loss_error_first = imp_out
         e = paddle.subtract(loss_cvr, imp_out)
 
         min_v = paddle.full_like(ctr_out_one, 0.000001)
         ctr_out_one = paddle.maximum(ctr_out_one, min_v)
+        IPS = paddle.divide(paddle.cast(O, dtype="float32"), ctr_out_one)
 
-        loss_error_second = paddle.multiply(O, e)
-        loss_error_second = paddle.divide(loss_error_second, ctr_out_one)
+        IPS = paddle.clip(IPS, min=-15, max=15)  #online trick 
+        IPS.stop_gradient = True
 
-        loss_error = loss_error_first + loss_error_second
+        loss_error_second = paddle.multiply(e, IPS)
+
+        loss_error = imp_out + loss_error_second
 
         #dr imp part
         loss_imp = paddle.square(e)
-        loss_imp = paddle.multiply(loss_imp, O)
-        loss_imp = paddle.divide(loss_imp, ctr_out_one)
+        loss_imp = paddle.multiply(loss_imp, IPS)
+
+        loss_dr = loss_error + loss_imp
 
-        return loss_error + loss_imp
+        return paddle.mean(loss_dr)
 
     # construct train forward phase  
     def train_forward(self, dy_model, metrics_list, batch_data, config):
diff --git a/models/multitask/escm2/static_model.py b/models/multitask/escm2/static_model.py
@@ -65,39 +65,43 @@ def counterfact_ipw(self, loss_cvr, ctr_num, O, ctr_out_one):
         PS = paddle.multiply(
             ctr_out_one, paddle.cast(
                 ctr_num, dtype="float32"))
-        PS = paddle.multiply(PS, paddle.cast(ctr_num, dtype="float32"))
         min_v = paddle.full_like(PS, 0.000001)
         PS = paddle.maximum(PS, min_v)
         IPS = paddle.reciprocal(PS)
-        #batch_shape = paddle.full_like(O, 1)
-        #batch_size = paddle.sum(paddle.cast(batch_shape, dtype="float32"), axis=0)
+        batch_shape = paddle.full_like(O, 1)
+        batch_size = paddle.sum(paddle.cast(
+            batch_shape, dtype="float32"),
+                                axis=0)
         #TODO this shoud be a hyparameter
         IPS = paddle.clip(IPS, min=-15, max=15)  #online trick 
-        #IPS = paddle.multiply(IPS, batch_size)
+        IPS = paddle.multiply(IPS, batch_size)
         IPS.stop_gradient = True
         loss_cvr = paddle.multiply(loss_cvr, IPS)
         loss_cvr = paddle.multiply(loss_cvr, O)
-        return loss_cvr
+        return paddle.mean(loss_cvr)
 
-    def counterfact_dr(self, loss_cvr, ctr_num, O, ctr_out_one, imp_out):
+    def counterfact_dr(self, loss_cvr, O, ctr_out_one, imp_out):
         #dr error part
-        loss_error_first = imp_out
         e = paddle.subtract(loss_cvr, imp_out)
 
         min_v = paddle.full_like(ctr_out_one, 0.000001)
         ctr_out_one = paddle.maximum(ctr_out_one, min_v)
+        IPS = paddle.divide(paddle.cast(O, dtype="float32"), ctr_out_one)
 
-        loss_error_second = paddle.multiply(O, e)
-        loss_error_second = paddle.divide(loss_error_second, ctr_out_one)
+        IPS = paddle.clip(IPS, min=-15, max=15)  #online trick 
+        IPS.stop_gradient = True
 
-        loss_error = loss_error_first + loss_error_second
+        loss_error_second = paddle.multiply(e, IPS)
+
+        loss_error = imp_out + loss_error_second
 
         #dr imp part
         loss_imp = paddle.square(e)
-        loss_imp = paddle.multiply(loss_imp, O)
-        loss_imp = paddle.divide(loss_imp, ctr_out_one)
+        loss_imp = paddle.multiply(loss_imp, IPS)
+
+        loss_dr = loss_error + loss_imp
 
-        return loss_error + loss_imp
+        return paddle.mean(loss_dr)
 
     def net(self, inputs, is_infer=False):
 
@@ -138,7 +142,7 @@ def net(self, inputs, is_infer=False):
             input=cvr_out_one, label=paddle.cast(
                 ctcvr_buy, dtype="float32"))
         if self.counterfact_mode == "DR":
-            loss_cvr = self.counterfact_dr(loss_cvr, ctr_num, O, ctr_out_one,
+            loss_cvr = self.counterfact_dr(loss_cvr, O, ctr_out_one,
                                            out_list[6])
         else:
             loss_cvr = self.counterfact_ipw(loss_cvr, ctr_num, O, ctr_out_one)