Fwfm (#237)

pandeconscious · web-flow · commit a07132ab3752 · 2020-06-26T10:40:15.000+05:30
* README updated

* FwFM interaction layer added

* FwFM with deep support added

* tests for FwFM added

* example to run fwfm
diff --git a/README.md b/README.md
@@ -40,6 +40,7 @@ Let's [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Star
 |                AutoInt                 | [arxiv 2018][AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks](https://arxiv.org/abs/1810.11921)                              |
 |         Deep Interest Network          | [KDD 2018][Deep Interest Network for Click-Through Rate Prediction](https://arxiv.org/pdf/1706.06978.pdf)                                                       |
 |    Deep Interest Evolution Network     | [AAAI 2019][Deep Interest Evolution Network for Click-Through Rate Prediction](https://arxiv.org/pdf/1809.03672.pdf)                                            |
+|                FwFM                    | [WWW 2018][Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising](https://arxiv.org/pdf/1806.03514.pdf)                |
 |                  ONN                  | [arxiv 2019][Operation-aware Neural Networks for User Response Prediction](https://arxiv.org/pdf/1904.12579.pdf)                                                |
 |                 FGCNN                  | [WWW 2019][Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction ](https://arxiv.org/pdf/1904.04447)                             |
 |     Deep Session Interest Network      | [IJCAI 2019][Deep Session Interest Network for Click-Through Rate Prediction ](https://arxiv.org/abs/1905.06482)                                                |
diff --git a/deepctr/layers/__init__.py b/deepctr/layers/__init__.py
@@ -5,7 +5,7 @@
 from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet,
                           InnerProductLayer, InteractingLayer,
                           OutterProductLayer, FGCNNLayer,SENETLayer,BilinearInteraction,
-                          FieldWiseBiInteraction)
+                          FieldWiseBiInteraction, FwFM)
 from .normalization import LayerNormalization
 from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM,
                        KMaxPooling, SequencePoolingLayer,WeightedSequenceLayer,
@@ -41,5 +41,6 @@
                   'BilinearInteraction':BilinearInteraction,
                   'WeightedSequenceLayer':WeightedSequenceLayer,
                   'Add':Add,
-                  'FieldWiseBiInteraction':FieldWiseBiInteraction
+                  'FieldWiseBiInteraction':FieldWiseBiInteraction,
+                  'FwFM': FwFM
                   }
diff --git a/deepctr/layers/interaction.py b/deepctr/layers/interaction.py
@@ -1,8 +1,9 @@
 # -*- coding:utf-8 -*-
 """
 
-Author:
-    Weichen Shen,wcshen1994@163.com
+Authors:
+    Weichen Shen,wcshen1994@163.com,
+    Harshit Pande
 
 """
 
@@ -11,9 +12,10 @@
 import tensorflow as tf
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.initializers import (Zeros, glorot_normal,
-                                                  glorot_uniform)
+                                                  glorot_uniform, TruncatedNormal)
 from tensorflow.python.keras.layers import Layer
 from tensorflow.python.keras.regularizers import l2
+from tensorflow.python.keras.backend import batch_dot
 from tensorflow.python.layers import utils
 
 from .activation import activation_layer
@@ -1052,7 +1054,7 @@ class FieldWiseBiInteraction(Layer):
 
       Output shape
         - 2D tensor with shape: ``(batch_size,embedding_size)``.
-     
+
       Arguments
         - **use_bias** : Boolean, if use bias.
         - **seed** : A Python integer to use as random seed.
@@ -1062,7 +1064,7 @@ class FieldWiseBiInteraction(Layer):
 
     """
 
-    def __init__(self,use_bias=True, seed=1024, **kwargs):
+    def __init__(self, use_bias=True, seed=1024, **kwargs):
         self.use_bias = use_bias
         self.seed = seed
 
@@ -1167,3 +1169,80 @@ def get_config(self, ):
         config = {'use_bias': self.use_bias, 'seed': self.seed}
         base_config = super(FieldWiseBiInteraction, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
+
+
+class FwFM(Layer):
+    """Field-weighted Factorization Machines
+
+      Input shape
+        - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
+
+      Output shape
+        - 2D tensor with shape: ``(batch_size, 1)``.
+
+      Arguments
+        - **num_fields** : integer for number of fields
+        - **regularizer** : L2 regularizer weight for the field strength parameters of FwFM
+
+      References
+        - [Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising]
+        https://arxiv.org/pdf/1806.03514.pdf
+    """
+
+    def __init__(self, num_fields=4, regularizer=0.000001, **kwargs):
+        self.num_fields = num_fields
+        self.regularizer = regularizer
+        super(FwFM, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        if len(input_shape) != 3:
+            raise ValueError("Unexpected inputs dimensions % d,\
+                             expect to be 3 dimensions" % (len(input_shape)))
+
+        if input_shape[1] != self.num_fields:
+            raise ValueError("Mismatch in number of fields {} and \
+                 concatenated embeddings dims {}".format(self.num_fields, input_shape[1]))
+
+        self.field_strengths = self.add_weight(name='field_pair_strengths',
+                                               shape=(self.num_fields, self.num_fields),
+                                               initializer=TruncatedNormal(),
+                                               regularizer=l2(self.regularizer),
+                                               trainable=True)
+
+        super(FwFM, self).build(input_shape)  # Be sure to call this somewhere!
+
+    def call(self, inputs, **kwargs):
+        if K.ndim(inputs) != 3:
+            raise ValueError(
+                "Unexpected inputs dimensions %d, expect to be 3 dimensions"
+                % (K.ndim(inputs)))
+
+        if inputs.shape[1] != self.num_fields:
+            raise ValueError("Mismatch in number of fields {} and \
+                 concatenated embeddings dims {}".format(self.num_fields, inputs.shape[1]))
+
+        pairwise_inner_prods = []
+        for fi, fj in itertools.combinations(range(self.num_fields), 2):
+            # get field strength for pair fi and fj
+            r_ij = self.field_strengths[fi, fj]
+
+            # get embeddings for the features of both the fields
+            feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1)
+            feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1)
+
+            f = tf.scalar_mul(r_ij, batch_dot(feat_embed_i, feat_embed_j, axes=1))
+            pairwise_inner_prods.append(f)
+
+        sum_ = tf.add_n(pairwise_inner_prods)
+        return sum_
+
+    def compute_output_shape(self, input_shape):
+        return (None, 1)
+
+    def get_config(self):
+        config = super(FwFM, self).get_config().copy()
+        config.update({
+            'num_fields': self.num_fields,
+            'regularizer': self.regularizer
+        })
+        return config
diff --git a/deepctr/models/__init__.py b/deepctr/models/__init__.py
@@ -17,6 +17,7 @@
 from .dsin import DSIN
 from .fibinet import FiBiNET
 from .flen import FLEN
+from .deepfwfm import DeepFwFM
 
-__all__ = ["AFM", "CCPM","DCN", "MLR",  "DeepFM",
-           "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN", "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN']
+__all__ = ["AFM", "CCPM","DCN", "MLR",  "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
+           "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "DeepFwFM"]
diff --git a/deepctr/models/deepfwfm.py b/deepctr/models/deepfwfm.py
@@ -0,0 +1,73 @@
+# -*- coding:utf-8 -*-
+"""
+Author:
+    Harshit Pande
+
+Reference:
+    [1] Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising
+    (https://arxiv.org/pdf/1806.03514.pdf)
+
+"""
+
+from itertools import chain
+import tensorflow as tf
+
+from ..inputs import input_from_feature_columns, get_linear_logit, build_input_features, combined_dnn_input, DEFAULT_GROUP_NAME
+from ..layers.core import PredictionLayer, DNN
+from ..layers.interaction import FwFM
+from ..layers.utils import concat_func, add_func
+
+
+def DeepFwFM(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128),
+             l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0,
+             init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'):
+    """Instantiates the DeepFwFM Network architecture.
+
+    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
+    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
+    :param fm_group: list, group_name of features that will be used to do feature interactions.
+    :param dnn_hidden_units: list,list of positive integer or empty list if do not want DNN, the layer number and units
+    in each layer of DNN
+    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
+    :param l2_reg_field_strength: float. L2 regularizer strength applied to the field pair strength parameters
+    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
+    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
+    :param init_std: float,to use as the initialize std of embedding vector
+    :param seed: integer ,to use as random seed.
+    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
+    :param dnn_activation: Activation function to use in DNN
+    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
+    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
+    :return: A Keras model instance.
+    """
+
+    features = build_input_features(linear_feature_columns + dnn_feature_columns)
+
+    inputs_list = list(features.values())
+
+    group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
+                                                                        l2_reg_embedding, init_std, seed,
+                                                                        support_group=True)
+
+    linear_logit = get_linear_logit(features, linear_feature_columns, init_std=init_std, seed=seed, prefix='linear',
+                                    l2_reg=l2_reg_linear)
+
+    fwfm_logit = add_func([FwFM(num_fields=len(v), regularizer=l2_reg_field_strength)
+                           (concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group])
+
+    final_logit_components = [linear_logit, fwfm_logit]
+
+    if dnn_hidden_units:
+        dnn_input = combined_dnn_input(list(chain.from_iterable(
+            group_embedding_dict.values())), dense_value_list)
+        dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
+                         dnn_use_bn, seed)(dnn_input)
+        dnn_logit = tf.keras.layers.Dense(
+            1, use_bias=False, activation=None)(dnn_output)
+        final_logit_components.append(dnn_logit)
+
+    final_logit = add_func(final_logit_components)
+
+    output = PredictionLayer(task)(final_logit)
+    model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
+    return model
diff --git a/examples/run_fwfm.py b/examples/run_fwfm.py
@@ -0,0 +1,52 @@
+import pandas as pd
+from sklearn.metrics import log_loss, roc_auc_score
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler
+
+from deepctr.models import DeepFwFM
+from deepctr.inputs import  SparseFeat, DenseFeat, get_feature_names
+
+if __name__ == "__main__":
+    data = pd.read_csv('./criteo_sample.txt')
+
+    sparse_features = ['C' + str(i) for i in range(1, 27)]
+    dense_features = ['I' + str(i) for i in range(1, 14)]
+
+    data[sparse_features] = data[sparse_features].fillna('-1', )
+    data[dense_features] = data[dense_features].fillna(0, )
+    target = ['label']
+
+    # 1.Label Encoding for sparse features,and do simple Transformation for dense features
+    for feat in sparse_features:
+        lbe = LabelEncoder()
+        data[feat] = lbe.fit_transform(data[feat])
+    mms = MinMaxScaler(feature_range=(0, 1))
+    data[dense_features] = mms.fit_transform(data[dense_features])
+
+    # 2.count #unique features for each sparse field,and record dense feature field name
+
+    fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4)
+                           for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,)
+                          for feat in dense_features]
+
+    dnn_feature_columns = fixlen_feature_columns
+    linear_feature_columns = fixlen_feature_columns
+
+    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
+
+    # 3.generate input data for model
+
+    train, test = train_test_split(data, test_size=0.2)
+    train_model_input = {name:train[name] for name in feature_names}
+    test_model_input = {name:test[name] for name in feature_names}
+
+    # 4.Define Model,train,predict and evaluate
+    model = DeepFwFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(100,100), task='binary')
+    model.compile("adam", "binary_crossentropy",
+                  metrics=['binary_crossentropy'], )
+
+    history = model.fit(train_model_input, train[target].values,
+                        batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
+    pred_ans = model.predict(test_model_input, batch_size=256)
+    print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
+    print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
diff --git a/tests/layers/interaction_test.py b/tests/layers/interaction_test.py
@@ -2,8 +2,12 @@
 
 try:
     from tensorflow.python.keras.utils import CustomObjectScope
+    from tensorflow.python.keras.regularizers import l2
+    from tensorflow.python.keras.initializers import TruncatedNormal
 except:
     from tensorflow.keras.utils import CustomObjectScope
+    from tensorflow.keras.regularizers import l2
+    from tensorflow.keras.initializers import TruncatedNormal
 from deepctr import layers
 
 from tests.utils import layer_test
@@ -13,6 +17,14 @@
 EMBEDDING_SIZE = 3
 SEQ_LENGTH = 10
 
+@pytest.mark.parametrize(
+    'reg_strength',
+    [0.000001]
+)
+def test_FwFM(reg_strength):
+    with CustomObjectScope({'FwFM': layers.FwFM}):
+        layer_test(layers.FwFM, kwargs={'num_fields': FIELD_SIZE, 'regularizer': reg_strength},
+                   input_shape=(BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
 
 @pytest.mark.parametrize(
 
diff --git a/tests/models/DeepFwFM_test.py b/tests/models/DeepFwFM_test.py
@@ -0,0 +1,23 @@
+import pytest
+from deepctr.models import DeepFwFM
+from ..utils import check_model, get_test_data, SAMPLE_SIZE
+
+
+@pytest.mark.parametrize(
+    'hidden_size,sparse_feature_num',
+    [((2,), 1),
+     ((), 1),
+     ]
+)
+def test_DeepFwFM(hidden_size, sparse_feature_num):
+    model_name = "DeepFwFM"
+    sample_size = SAMPLE_SIZE
+    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
+                                          dense_feature_num=sparse_feature_num)
+    model = DeepFwFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
+
+    check_model(model, model_name, x, y)
+
+
+if __name__ == "__main__":
+    pass