Add EDCN model.

Heyi007 · 何意 · shenweichen · web-flow · commit c13aba6db708 · 2022-11-02T20:40:32.000+08:00
* feat: Add EDCN model.

Co-authored-by: 何意 &lt;heyi.jack@bytedance.com&gt;
Co-authored-by: 浅梦 &lt;weichenswc@163.com&gt;
diff --git a/README.md b/README.md
@@ -66,6 +66,7 @@ Introduction](https://zhuanlan.zhihu.com/p/53231955)) and [welcome to join us!](
 |   ESMM                    | [SIGIR 2018][Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate](https://arxiv.org/abs/1804.07931)                       |
 |   MMOE                    | [KDD 2018][Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts](https://dl.acm.org/doi/abs/10.1145/3219819.3220007)                   |
 |   PLE                    | [RecSys 2020][Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations](https://dl.acm.org/doi/10.1145/3383313.3412236)                   |
+|   EDCN                   | [KDD 2021][Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf)                   |
 
 ## Citation
 
diff --git a/deepctr/layers/__init__.py b/deepctr/layers/__init__.py
@@ -1,11 +1,11 @@
 import tensorflow as tf
 
 from .activation import Dice
-from .core import DNN, LocalActivationUnit, PredictionLayer
+from .core import DNN, LocalActivationUnit, PredictionLayer, RegulationLayer
 from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet, CrossNetMix,
                           InnerProductLayer, InteractingLayer,
                           OutterProductLayer, FGCNNLayer, SENETLayer, BilinearInteraction,
-                          FieldWiseBiInteraction, FwFMLayer, FEFMLayer)
+                          FieldWiseBiInteraction, FwFMLayer, FEFMLayer, BridgeLayer)
 from .normalization import LayerNormalization
 from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM,
                        KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer,
@@ -28,6 +28,7 @@
                   'SequencePoolingLayer': SequencePoolingLayer,
                   'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer,
                   'CIN': CIN,
+                  'RegulationLayer': RegulationLayer,
                   'InteractingLayer': InteractingLayer,
                   'LayerNormalization': LayerNormalization,
                   'BiLSTM': BiLSTM,
@@ -48,5 +49,6 @@
                   'softmax': softmax,
                   'FEFMLayer': FEFMLayer,
                   'reduce_sum': reduce_sum,
-                  'PositionEncoding':PositionEncoding
+                  'PositionEncoding': PositionEncoding,
+                  'BridgeLayer': BridgeLayer
                   }
diff --git a/deepctr/layers/core.py b/deepctr/layers/core.py
@@ -10,9 +10,9 @@
 from tensorflow.python.keras import backend as K
 
 try:
-    from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal
+    from tensorflow.python.ops.init_ops_v2 import Zeros, Ones, glorot_normal
 except ImportError:
-    from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal
+    from tensorflow.python.ops.init_ops import Zeros, Ones, glorot_normal_initializer as glorot_normal
 
 from tensorflow.python.keras.layers import Layer, Dropout
 
@@ -265,3 +265,59 @@ def get_config(self, ):
         config = {'task': self.task, 'use_bias': self.use_bias}
         base_config = super(PredictionLayer, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
+
+
+class RegulationLayer(Layer):
+    """Regulation module used in EDCN.
+
+      Input shape
+        - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``.
+
+      Output shape
+        - 2D tensor with shape: ``(batch_size, embedding_size * field_num)``.
+
+      Arguments
+        - **tau** : Positive float, the temperature coefficient to control
+        distribution of field-wise gating unit.
+
+        - **seed** : A Python integer to use as random seed.
+
+      References
+        - [Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models.](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf)
+    """
+
+    def __init__(self, tau=0.1, **kwargs):
+        if tau == 0:
+            raise ValueError("RegulationLayer tau can not be zero.")
+        self.tau = 1.0 / tau
+        super(RegulationLayer, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        self.field_num = int(input_shape[1])
+        self.embedding_size = int(input_shape[2])
+        self.g = self.add_weight(
+            shape=(1, self.field_num, 1),
+            initializer=Ones(),
+            name=self.name + '_field_weight')
+
+        # Be sure to call this somewhere!
+        super(RegulationLayer, self).build(input_shape)
+
+    def call(self, inputs, **kwargs):
+
+        if K.ndim(inputs) != 3:
+            raise ValueError(
+                "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
+
+        feild_gating_score = tf.nn.softmax(self.g * self.tau, 1)
+        E = inputs * feild_gating_score
+        return tf.reshape(E, [-1, self.field_num * self.embedding_size])
+
+    def compute_output_shape(self, input_shape):
+        return (None, self.field_num * self.embedding_size)
+
+    def get_config(self):
+        config = {'tau': self.tau}
+        base_config = super(RegulationLayer, self).get_config()
+        base_config.update(config)
+        return base_config
diff --git a/deepctr/layers/interaction.py b/deepctr/layers/interaction.py
@@ -3,7 +3,8 @@
 
 Authors:
     Weichen Shen,weichenswc@163.com,
-    Harshit Pande
+    Harshit Pande,
+    Yi He, heyi_jack@163.com
 
 """
 
@@ -26,6 +27,7 @@
 
 from .activation import activation_layer
 from .utils import concat_func, reduce_sum, softmax, reduce_mean
+from .core import DNN
 
 
 class AFMLayer(Layer):
@@ -1489,3 +1491,74 @@ def get_config(self):
             'regularizer': self.regularizer,
         })
         return config
+
+
+class BridgeLayer(Layer):  # ridge
+    """AttentionPoolingLayer layer used in EDCN
+
+      Input shape
+        - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Its length is ``number of subnetworks``.
+
+      Output shape
+        - 2D tensor with shape: ``(batch_size, embedding_size)``.
+
+    Arguments
+       - **activation**: Activation function to use.
+       
+        - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix.
+
+        - **seed**: A Python integer to use as random seed.
+
+      References
+        - [Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models.](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf)
+
+    """
+
+    def __init__(self, bridge_type='attention_pooling', activation='relu', l2_reg=0, seed=1024, **kwargs):
+        self.bridge_type = bridge_type
+        self.activation = activation
+        self.l2_reg = l2_reg
+        self.seed = seed
+
+        super(BridgeLayer, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        if not isinstance(input_shape, list) or len(input_shape) < 2:
+            raise ValueError(
+                'A `AttentionPoolingLayer` layer should be called '
+                'on a list of at least 2 inputs')
+
+        self.dnn_dim = int(input_shape[0][-1])
+
+        self.dense = Dense(self.dnn_dim, self.activation)
+        self.dense_x = DNN([self.dnn_dim, self.dnn_dim], output_activation='softmax')
+        self.dense_h = DNN([self.dnn_dim, self.dnn_dim], output_activation='softmax')
+
+        super(BridgeLayer, self).build(input_shape)  # Be sure to call this somewhere!
+
+    def call(self, inputs, **kwargs):
+        x, h = inputs
+        if self.bridge_type == "pointwise_addition":
+            return x + h
+        elif self.bridge_type == "hadamard_product":
+            return x * h
+        elif self.bridge_type == "concatenation":
+            return self.dense(tf.concat(inputs, axis=-1))
+        elif self.bridge_type == "attention_pooling":
+            a_x = self.dense_x(x)
+            a_h = self.dense_h(h)
+            return a_x * x + a_h * h
+
+    def compute_output_shape(self, input_shape):
+        return (None, self.dnn_dim)
+
+    def get_config(self):
+        base_config = super(BridgeLayer, self).get_config().copy()
+        config = {
+            'bridge_type': self.bridge_type,
+            'l2_reg': self.l2_reg,
+            'activation': self.activation,
+            'seed': self.seed
+        }
+        config.update(base_config)
+        return config
diff --git a/deepctr/models/__init__.py b/deepctr/models/__init__.py
@@ -20,7 +20,8 @@
 from .sequence import DIN, DIEN, DSIN, BST
 from .wdl import WDL
 from .xdeepfm import xDeepFM
+from .edcn import EDCN
 
 __all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
            "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST", "DeepFEFM",
-           "SharedBottom", "ESMM", "MMOE", "PLE"]
+           "SharedBottom", "ESMM", "MMOE", "PLE", 'EDCN']
diff --git a/deepctr/models/edcn.py b/deepctr/models/edcn.py
@@ -0,0 +1,107 @@
+# -*- coding:utf-8 -*-
+"""
+Author:
+    Yi He, heyi_jack@163.com
+
+Reference:
+    [1] Chen, B., Wang, Y., Liu, et al. Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models. CIKM, 2021, October (https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf)
+"""
+import tensorflow as tf
+from tensorflow.python.keras.layers import Dense, Lambda, Reshape, Concatenate
+from tensorflow.python.keras.models import Model
+
+from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
+from ..layers.core import PredictionLayer, DNN, RegulationLayer
+from ..layers.interaction import CrossNet, BridgeLayer
+from ..layers.utils import add_func, concat_func
+
+
+def EDCN(linear_feature_columns,
+         dnn_feature_columns,
+         bridge_type='attention_pooling',
+         tau=0.1,
+         use_dense_features=True,
+         cross_num=2,
+         cross_parameterization='vector',
+         l2_reg_linear=1e-5,
+         l2_reg_embedding=1e-5,
+         l2_reg_cross=1e-5,
+         l2_reg_dnn=0,
+         seed=10000,
+         dnn_dropout=0,
+         dnn_use_bn=False,
+         dnn_activation='relu',
+         task='binary'):
+    """Instantiates the Enhanced Deep&Cross Network architecture.
+    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
+    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
+    :param bridge_type: The type of bridge interaction, one of 'pointwise_addition', 'hadamard_product', 'concatenation', 'attention_pooling'
+    :param tau: Positive float, the temperature coefficient to control distribution of field-wise gating unit
+    :param use_dense_features: Whether to use dense features, if True, dense feature will be projected to sparse embedding space
+    :param cross_num: positive integet,cross layer number
+    :param cross_parameterization: str, ``"vector"`` or ``"matrix"``, how to parameterize the cross network.
+    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
+    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
+    :param l2_reg_cross: float. L2 regularizer strength applied to cross net
+    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
+    :param seed: integer ,to use as random seed.
+    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
+    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN
+    :param dnn_activation: Activation function to use in DNN
+    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
+    :return: A Keras model instance.
+
+    """
+    if cross_num == 0:
+        raise ValueError("Cross layer num must > 0")
+
+    print('EDCN brige type: ', bridge_type)
+
+    features = build_input_features(dnn_feature_columns)
+    inputs_list = list(features.values())
+
+    linear_logit = get_linear_logit(features,
+                                    linear_feature_columns,
+                                    seed=seed,
+                                    prefix='linear',
+                                    l2_reg=l2_reg_linear)
+
+    sparse_embedding_list, dense_value_list = input_from_feature_columns(
+        features, dnn_feature_columns, l2_reg_embedding, seed)
+
+    # project dense value to sparse embedding space, generate a new field feature
+    sparse_embedding_dim = int(sparse_embedding_list[0].shape[-1])
+    if use_dense_features:
+        dense_value_feild = concat_func(dense_value_list)
+        dense_value_feild = Dense(sparse_embedding_dim, dnn_activation)(dense_value_feild)
+        dense_value_feild = Lambda(lambda x: tf.expand_dims(x, axis=1))(dense_value_feild)
+        sparse_embedding_list.append(dense_value_feild)
+
+    deep_in = concat_func(sparse_embedding_list, axis=1)
+    cross_in = concat_func(sparse_embedding_list, axis=1)
+    field_size = len(sparse_embedding_list)
+    cross_dim = field_size * int(cross_in[0].shape[-1])
+
+    for i in range(cross_num):
+        deep_in = RegulationLayer(tau)(deep_in)
+        cross_in = RegulationLayer(tau)(cross_in)
+        cross_out = CrossNet(1, parameterization=cross_parameterization,
+                             l2_reg=l2_reg_cross)(deep_in)
+        deep_out = DNN([cross_dim], dnn_activation, l2_reg_dnn,
+                       dnn_dropout, dnn_use_bn, seed=seed)(cross_in)
+
+        bridge_out = BridgeLayer(bridge_type)([cross_out, deep_out])
+        bridge_out_list = Reshape([field_size, sparse_embedding_dim])(bridge_out)
+
+        deep_in = bridge_out_list
+        cross_in = bridge_out_list
+
+    stack_out = Concatenate()([cross_out, deep_out, bridge_out])
+    final_logit = Dense(1, use_bias=False)(stack_out)
+
+    final_logit = add_func([final_logit, linear_logit])
+    output = PredictionLayer(task)(final_logit)
+
+    model = Model(inputs=inputs_list, outputs=final_logit)
+
+    return model
diff --git a/tests/models/EDCN_test.py b/tests/models/EDCN_test.py
@@ -0,0 +1,31 @@
+import pytest
+import tensorflow as tf
+
+from deepctr.models import EDCN
+from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
+    TEST_Estimator
+
+
+@pytest.mark.parametrize(
+    'bridge_type, tau, use_dense_features, cross_num, cross_parameterization, sparse_feature_num',
+    [
+        ('pointwise_addition', 1, True, 2, 'vector', 3),
+        ('hadamard_product', 1, False, 2, 'vector', 4),
+        ('concatenation', 1, True, 3, 'vector', 5),
+        ('attention_pooling', 1, True, 2, 'matrix', 6),
+    ]
+)
+def test_EDCN(bridge_type, tau, use_dense_features, cross_num, cross_parameterization, sparse_feature_num):
+    model_name = "EDCN"
+
+    sample_size = SAMPLE_SIZE
+    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
+                                          dense_feature_num=sparse_feature_num)
+
+    model = EDCN(feature_columns, feature_columns,
+                 bridge_type, tau, use_dense_features, cross_num, cross_parameterization)
+    check_model(model, model_name, x, y)
+
+
+if __name__ == "__main__":
+    pass