PaddlePaddle
diff --git a/‎README_CN.md‎
Lines changed: 4 additions & 2 deletions b/‎README_CN.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎README_EN.md‎
Lines changed: 1 addition & 0 deletions b/‎README_EN.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎contributor.md‎
Lines changed: 3 additions & 0 deletions b/‎contributor.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎datasets/criteo_dcn_v2/download.sh‎
Lines changed: 16 additions & 0 deletions b/‎datasets/criteo_dcn_v2/download.sh‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎datasets/criteo_dcn_v2/get_slot_data.py‎
Lines changed: 106 additions & 0 deletions b/‎datasets/criteo_dcn_v2/get_slot_data.py‎
Lines changed: 106 additions & 0 deletions
diff --git a/‎datasets/criteo_dcn_v2/run.sh‎
Lines changed: 24 additions & 0 deletions b/‎datasets/criteo_dcn_v2/run.sh‎
Lines changed: 24 additions & 0 deletions
@@ -128,6 +128,7 @@ python -u tools/static_trainer.py -m models/rank/dnn/config.yaml #  静态图训
   |   召回   |                     [NCF](models/recall/ncf/)([文档](https://paddlerec.readthedocs.io/en/latest/models/recall/ncf.html))                     |  [Python CPU/GPU](https://aistudio.baidu.com/aistudio/projectdetail/3240152)  |       ✓     |     ✓     | >=2.1.0 | [WWW 2017][Neural Collaborative Filtering](https://arxiv.org/pdf/1708.05031.pdf)                                                                                                                            |
   |   召回   |                     [TiSAS](models/recall/tisas/)            |   -   |    ✓    |     ✓     | >=2.1.0 | [WSDM 2020][Time Interval Aware Self-Attention for Sequential Recommendation](https://cseweb.ucsd.edu/~jmcauley/pdfs/wsdm20b.pdf)                                                                                               |
   |   召回   |                     [ENSFM](models/recall/ensfm/)                     |  -  |     ✓     |     ✓     | >=2.1.0 | [IW3C2 2020][Eicient Non-Sampling Factorization Machines for Optimal Context-Aware Recommendation](http://www.thuir.cn/group/~mzhang/publications/TheWebConf2020-Chenchong.pdf)                                                               |
+  |   召回   |                     [MHCN](models/recall/mhcn/)                     |  -  |     ✓     |     ✓     | >=2.1.0 | [WWW 2021][Self-Supervised Multi-Channel Hypergraph Convolutional Network for Social Recommendation](https://arxiv.org/pdf/2101.06448v3.pdf)                                                               |
   |   召回   |                     [GNN](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/recall/gnn/)                     |  -  |       ✓     |     ✓     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [AAAI 2019][Session-based Recommendation with Graph Neural Networks](https://arxiv.org/abs/1811.00855)                                                                                                      |
   |   召回   |                     [RALM](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/recall/look-alike_recall/)                     |  -  |       ✓     |     ✓     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [KDD 2019][Real-time Attention Based Look-alike Model for Recommender System](https://arxiv.org/pdf/1906.05022.pdf)                                                                                                      |
   |   排序   |      [Logistic Regression](models/rank/logistic_regression/)([文档](https://paddlerec.readthedocs.io/en/latest/models/rank/logistic_regression.html))      |  [Python CPU/GPU](https://aistudio.baidu.com/aistudio/projectdetail/3240481)  |       ✓     |     x     | >=2.1.0 | /                                                                                                                                                                                                           |
@@ -157,9 +158,10 @@ python -u tools/static_trainer.py -m models/rank/dnn/config.yaml #  静态图训
   |   排序   |                [Wide&Deep](models/rank/wide_deep/)([文档](https://paddlerec.readthedocs.io/en/latest/models/rank/wide_deep.html))                |  [Python CPU/GPU](https://aistudio.baidu.com/aistudio/projectdetail/3238421)  |       ✓     |     x     | >=2.1.0 | [DLRS 2016][Wide & Deep Learning for Recommender Systems](https://dl.acm.org/doi/pdf/10.1145/2988450.2988454)                                                                                               |
   |   排序   |                    [FGCNN](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/rank/fgcnn/)                    |  -  |       ✓     |     ✓     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [WWW 2019][Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction](https://arxiv.org/pdf/1904.04447.pdf)                                                                      |
   |   排序   |                  [Fibinet](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/rank/fibinet/)                  |  -  |       ✓     |     ✓     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [RecSys19][FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction]( https://arxiv.org/pdf/1905.09433.pdf)                                                 |
-  |   排序   |                     [Flen](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/rank/flen/)                     |  -  |       ✓     |     ✓     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [2019][FLEN: Leveraging Field for Scalable CTR Prediction]( https://arxiv.org/pdf/1911.04690.pdf)                                                                                                           |
+  |   排序   |                     [FLEN](models/rank/flen/)                     |  -  |       ✓     |     ✓     | >=2.1.0 | [2019][FLEN: Leveraging Field for Scalable CTR Prediction]( https://arxiv.org/pdf/1911.04690.pdf)                                                                                                           |
   |   排序   |                     [DeepRec](models/rank/deeprec/)                     |  -  |       ✓     |     ✓     | >=2.1.0 | [2017][Training Deep AutoEncoders for Collaborative Filtering](https://arxiv.org/pdf/1708.01715v3.pdf)                                                                                                          |
-  |   排序   |                     [AutoFIS](models/rank/autofis/)                     |  -  |       ✓     |     ✓     | >=2.1.0 | [KDD 2020][AutoFIS: Automatic Feature Interaction Selection in Factorization Models for Click-Through Rate Prediction](https://arxiv.org/pdf/2003.11235v3.pdf)                                                                                                          |                    |  -  |       ✓     |     ✓     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [2019][FLEN: Leveraging Field for Scalable CTR Prediction]( https://arxiv.org/pdf/1911.04690.pdf)                                                                                                           |
+  |   排序   |                     [AutoFIS](models/rank/autofis/)                     |  -  |       ✓     |     ✓     | >=2.1.0 | [KDD 2020][AutoFIS: Automatic Feature Interaction Selection in Factorization Models for Click-Through Rate Prediction](https://arxiv.org/pdf/2003.11235v3.pdf) 
+  |   排序   |                     [DCN_V2](models/rank/dcn_v2/)                     |  -  |       ✓     |     ✓     | >=2.1.0 | [WWW 2021][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/pdf/2008.13535v2.pdf) 
   |  多任务  |                  [PLE](models/multitask/ple/)([文档](https://paddlerec.readthedocs.io/en/latest/models/multitask/ple.html))                   |  [Python CPU/GPU](https://aistudio.baidu.com/aistudio/projectdetail/3238938)  |       ✓     |     ✓     |  >=2.1.0 | [RecSys 2020][Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations](https://dl.acm.org/doi/abs/10.1145/3383313.3412236)                                                              |
   |  多任务  |                  [ESMM](models/multitask/esmm/)([文档](https://paddlerec.readthedocs.io/en/latest/models/multitask/esmm.html))                   |  [Python CPU/GPU](https://aistudio.baidu.com/aistudio/projectdetail/3238583)  |       ✓     |     ✓     | >=2.1.0 | [SIGIR 2018][Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate](https://arxiv.org/abs/1804.07931)                                                              |
   |  多任务  |                  [MMOE](models/multitask/mmoe/)([文档](https://paddlerec.readthedocs.io/en/latest/models/multitask/mmoe.html))                   |  [Python CPU/GPU](https://aistudio.baidu.com/aistudio/projectdetail/3238934)  |       ✓     |     ✓     | >=2.1.0 | [KDD 2018][Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts](https://dl.acm.org/doi/abs/10.1145/3219819.3220007)                                                       |
 
@@ -150,6 +150,7 @@ python -u tools/static_trainer.py -m models/rank/dnn/config.yaml #  Training wit
   |         Rank          |                     [Flen](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/rank/flen/)                     |  -  |         ✓         |     ✓     |  [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [2019][FLEN: Leveraging Field for Scalable CTR Prediction]( https://arxiv.org/pdf/1911.04690.pdf)                                                                                                           |
   |   Rank   |                     [DeepRec](models/rank/deeprec/)                     |  -  |       ✓     |     ✓     | >=2.1.0 | [2017][Training Deep AutoEncoders for Collaborative Filtering](https://arxiv.org/pdf/1708.01715v3.pdf)                                                                                                          |
   |   Rank   |                     [AutoFIS](models/rank/autofis/)                     |  -  |       ✓     |     ✓     | >=2.1.0 | [KDD 2020][AutoFIS: Automatic Feature Interaction Selection in Factorization Models for Click-Through Rate Prediction](https://arxiv.org/pdf/2003.11235v3.pdf)                                                                                                          |
+  |   Rank   |                     [DCN_V2](models/rank/dcn_v2/)                     |  -  |       ✓     |     ✓     | >=2.1.0 | [WWW 2021][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/pdf/2008.13535v2.pdf) 
   |      Multi-Task       |                  [PLE](models/multitask/ple/)<br>([doc](https://paddlerec.readthedocs.io/en/latest/models/multitask/ple.html))                   |  [Python CPU/GPU](https://aistudio.baidu.com/aistudio/projectdetail/3238938)  |     ✓     |     ✓     |  >=2.1.0 | [RecSys 2020][Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations](https://dl.acm.org/doi/abs/10.1145/3383313.3412236)                                                              |
   |      Multi-Task       |                  [ESMM](models/multitask/esmm/)<br>([doc](https://paddlerec.readthedocs.io/en/latest/models/multitask/esmm.html))                   |  [Python CPU/GPU](https://aistudio.baidu.com/aistudio/projectdetail/3238583)  |         ✓         |     ✓     |      >=2.1.0     | [SIGIR 2018][Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate](https://arxiv.org/abs/1804.07931)                                                              |
   |      Multi-Task       |                  [MMOE](models/multitask/mmoe/)<br>([doc](https://paddlerec.readthedocs.io/en/latest/models/multitask/mmoe.html))                   |  [Python CPU/GPU](https://aistudio.baidu.com/aistudio/projectdetail/3238934)  |         ✓         |     ✓     |      >=2.1.0     | [KDD 2018][Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts](https://dl.acm.org/doi/abs/10.1145/3219819.3220007)                                                       |
 
@@ -17,5 +17,8 @@
   |                       [AutoFIS](models/rank/autofis/)                       |  [renmada](https://github.com/renmada)  |    https://github.com/PaddlePaddle/PaddleRec/pull/660    | 论文复现赛第五期 |
   |                       [Dselect_K](models/multitask/dselect_k/)                       |  [Andy1314Chen](https://github.com/Andy1314Chen)  |    https://github.com/PaddlePaddle/PaddleRec/pull/671    | 论文复现赛第五期 |
   |                     [MIND](models/recall/mind/)                     |  [duyiqi17 ](https://github.com/duyiqi17)  |    https://github.com/PaddlePaddle/PaddleRec/pull/398   | 其他 |
+  |                     [FLEN](models/rank/flen/)                     |  [LinJayan](https://github.com/LinJayan)  |    https://github.com/PaddlePaddle/PaddleRec/pull/685   | 论文复现赛第五期 |
+  |                     [MHCN](models/recall/mhcn/)                     |  [Andy1314Chen](https://github.com/Andy1314Chen)  |    https://github.com/PaddlePaddle/PaddleRec/pull/679   | 论文复现赛第五期 |
+  |                     [DCN_V2](models/rank/dcn_v2/)                     |  [LinJayan](https://github.com/LinJayan)  |    https://github.com/PaddlePaddle/PaddleRec/pull/677   | 论文复现赛第五期 |
 
 </div> 
@@ -0,0 +1,16 @@
+wget --no-check-certificate https://paddlerec.bj.bcebos.com/deepfm%2Ffeat_dict_10.pkl2
+
+wget --no-check-certificate https://fleet.bj.bcebos.com/ctr_data.tar.gz
+
+tar -zxvf ctr_data.tar.gz
+mv ./raw_data ./train_data_full
+mkdir train_data && cd train_data
+cp ../train_data_full/part-0 ../train_data_full/part-1 ./ && cd ..
+mv ./test_data ./test_data_full
+mkdir test_data && cd test_data
+cp ../test_data_full/part-220 ./  && cd ..
+echo "Complete data download."
+echo "Full Train data stored in ./train_data_full "
+echo "Full Test data stored in ./test_data_full "
+echo "Rapid Verification train data stored in ./train_data "
+echo "Rapid Verification test data stored in ./test_data "
@@ -0,0 +1,106 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.	
+#	
+# Licensed under the Apache License, Version 2.0 (the "License");	
+# you may not use this file except in compliance with the License.	
+# You may obtain a copy of the License at	
+#	
+#     http://www.apache.org/licenses/LICENSE-2.0	
+#	
+# Unless required by applicable law or agreed to in writing, software	
+# distributed under the License is distributed on an "AS IS" BASIS,	
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.	
+# See the License for the specific language governing permissions and	
+# limitations under the License.	
+
+import os
+import numpy as np
+import paddle.fluid.incubate.data_generator as dg
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+import paddle.fluid.incubate.data_generator as dg
+
+
+class Reader(dg.MultiSlotDataGenerator):
+    def __init__(self, config):
+        dg.MultiSlotDataGenerator.__init__(self)
+
+    def init(self):
+        # DCN_v2 use log normalize the 13 continuous features
+        # log（x+4）for dense-feature-2, log(x+1) for others
+
+        # self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        # self.cont_max_ = [
+        #     5775, 257675, 65535, 969, 23159456, 431037, 56311, 6047, 29019, 46,
+        #     231, 4008, 7393
+        # ]
+        # self.cont_diff_ = [
+        #     self.cont_max_[i] - self.cont_min_[i]
+        #     for i in range(len(self.cont_min_))
+        # ]
+
+        self.continuous_range_ = range(1, 14)
+        self.categorical_range_ = range(14, 40)
+        # load preprocessed feature dict	
+        self.feat_dict_name = "deepfm%2Ffeat_dict_10.pkl2"  # 
+        self.feat_dict_ = pickle.load(open(self.feat_dict_name, 'rb'))
+
+    def _process_line(self, line):
+        features = line.rstrip('\n').split('\t')
+        feat_idx = []
+        feat_value = []
+        # log normalize
+        for idx in self.continuous_range_:
+            if features[idx] == '':
+                # feat_idx.append(0)
+                feat_value.append(0.0)
+            else:
+                # feat_idx.append(self.feat_dict_[idx])
+                if idx == 2:  # log(x+4)
+                    feat_value.append(np.log(float(features[idx]) + 4))
+                else:  # log(x+1)
+                    feat_value.append(np.log(float(features[idx]) + 1))
+
+                # feat_idx.append(self.feat_dict_[idx])
+                # feat_value.append(
+                #     (float(features[idx]) - self.cont_min_[idx - 1]) /
+                #     self.cont_diff_[idx - 1])
+
+        for idx in self.categorical_range_:
+            if features[idx] == '' or features[idx] not in self.feat_dict_:
+                feat_idx.append(0)
+                # feat_value.append(0.0)
+            else:
+                feat_idx.append(self.feat_dict_[features[idx]])
+                # feat_value.append(1.0)
+        label = [int(features[0])]
+        return label, feat_value, feat_idx
+
+    def generate_sample(self, line):
+        """	
+        Read the data line by line and process it as a dictionary	
+        """
+
+        def data_iter():
+            label, feat_value, feat_idx = self._process_line(line)
+            s = ""
+            for i in [('click', label), ('dense_feature', feat_value),
+                      ('feat_idx', feat_idx)]:
+                k = i[0]
+                v = i[1]
+                for n, j in enumerate(v):
+                    if k == "feat_idx":
+                        s += " " + str(n + 1) + ":" + str(j)
+                    else:
+                        s += " " + k + ":" + str(j)
+            print(s.strip())  # add print for data preprocessing	
+            yield None
+
+        return data_iter
+
+
+reader = Reader("../config.yaml")
+reader.init()
+reader.run_from_stdin()
@@ -0,0 +1,24 @@
+sh download.sh
+mkdir slot_train_data_full
+for i in `ls ./train_data_full`
+do
+    cat train_data_full/$i | python get_slot_data.py > slot_train_data_full/$i
+done
+
+mkdir slot_test_data_full
+for i in `ls ./test_data_full`
+do
+    cat test_data_full/$i | python get_slot_data.py > slot_test_data_full/$i
+done
+
+mkdir slot_train_data
+for i in `ls ./train_data`
+do
+    cat train_data/$i | python get_slot_data.py > slot_train_data/$i
+done
+
+mkdir slot_test_data
+for i in `ls ./test_data`
+do
+    cat test_data/$i | python get_slot_data.py > slot_test_data/$i
+done