|
| 1 | +""" |
| 2 | +Author: |
| 3 | + |
| 4 | +
|
| 5 | +Reference: |
| 6 | + [1] Tang H, Liu J, Zhao M, et al. Progressive layered extraction (ple): A novel multi-task learning (mtl) model for personalized recommendations[C]//Fourteenth ACM Conference on Recommender Systems. 2020.(https://arxiv.org/abs/1804.07931) |
| 7 | +""" |
| 8 | +import tensorflow as tf |
| 9 | + |
| 10 | +from ...feature_column import build_input_features, input_from_feature_columns |
| 11 | +from ...layers.core import PredictionLayer, DNN |
| 12 | +from ...layers.utils import combined_dnn_input, reduce_sum |
| 13 | + |
| 14 | + |
| 15 | +def CGC(dnn_feature_columns, num_tasks=None, task_types=None, task_names=None, num_experts_specific=8, |
| 16 | + num_experts_shared=4, |
| 17 | + expert_dnn_units=(128, 128), gate_dnn_units=None, tower_dnn_units_lists=((32,), (32,)), |
| 18 | + l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False): |
| 19 | + """Instantiates the Customized Gate Control block of Progressive Layered Extraction architecture. |
| 20 | +
|
| 21 | + :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. |
| 22 | + :param num_tasks: integer, number of tasks, equal to number of outputs, must be greater than 1. |
| 23 | + :param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss, ``"regression"`` for regression loss. e.g. ['binary', 'regression'] |
| 24 | + :param task_names: list of str, indicating the predict target of each tasks |
| 25 | +
|
| 26 | + :param num_experts_specific: integer, number of task-specific experts. |
| 27 | + :param num_experts_shared: integer, number of task-shared experts. |
| 28 | +
|
| 29 | + :param expert_dnn_units: list, list of positive integer, its length must be greater than 1, the layer number and units in each layer of expert DNN |
| 30 | + :param gate_dnn_units: list, list of positive integer or None, the layer number and units in each layer of gate DNN, default value is None. e.g.[8, 8]. |
| 31 | + :param tower_dnn_units_lists: list, list of positive integer list, its length must be euqal to num_tasks, the layer number and units in each layer of task-specific DNN |
| 32 | +
|
| 33 | + :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector |
| 34 | + :param l2_reg_dnn: float. L2 regularizer strength applied to DNN |
| 35 | + :param seed: integer ,to use as random seed. |
| 36 | + :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. |
| 37 | + :param dnn_activation: Activation function to use in DNN |
| 38 | + :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN |
| 39 | + :return: a Keras model instance |
| 40 | + """ |
| 41 | + |
| 42 | + if num_tasks <= 1: |
| 43 | + raise ValueError("num_tasks must be greater than 1") |
| 44 | + if len(task_types) != num_tasks: |
| 45 | + raise ValueError("num_tasks must be equal to the length of task_types") |
| 46 | + |
| 47 | + for task_type in task_types: |
| 48 | + if task_type not in ['binary', 'regression']: |
| 49 | + raise ValueError("task must be binary or regression, {} is illegal".format(task_type)) |
| 50 | + |
| 51 | + if num_tasks != len(tower_dnn_units_lists): |
| 52 | + raise ValueError("the length of tower_dnn_units_lists must be euqal to num_tasks") |
| 53 | + |
| 54 | + features = build_input_features(dnn_feature_columns) |
| 55 | + |
| 56 | + inputs_list = list(features.values()) |
| 57 | + |
| 58 | + sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, |
| 59 | + l2_reg_embedding, seed) |
| 60 | + dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) |
| 61 | + |
| 62 | + expert_outputs = [] |
| 63 | + # build task-specific expert layer |
| 64 | + for i in range(num_tasks): |
| 65 | + for j in range(num_experts_specific): |
| 66 | + expert_network = DNN(expert_dnn_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, |
| 67 | + name='task_' + task_names[i] + '_expert_specific_' + str(j))(dnn_input) |
| 68 | + expert_outputs.append(expert_network) |
| 69 | + |
| 70 | + # build task-shared expert layer |
| 71 | + for i in range(num_experts_shared): |
| 72 | + expert_network = DNN(expert_dnn_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, |
| 73 | + name='expert_shared_' + str(i))(dnn_input) |
| 74 | + expert_outputs.append(expert_network) |
| 75 | + |
| 76 | + # build one Extraction Layer |
| 77 | + cgc_outs = [] |
| 78 | + for i in range(num_tasks): |
| 79 | + # concat task-specific expert and task-shared expert |
| 80 | + cur_expert_num = num_experts_specific + num_experts_shared |
| 81 | + cur_experts = expert_outputs[i * num_experts_specific:(i + 1) * num_experts_specific] + expert_outputs[-int( |
| 82 | + num_experts_shared):] # task_specific + task_shared |
| 83 | + expert_concat = tf.keras.layers.concatenate(cur_experts, axis=1, name='expert_concat_' + task_names[i]) |
| 84 | + expert_concat = tf.keras.layers.Reshape([cur_expert_num, expert_dnn_units[-1]], |
| 85 | + name='expert_reshape_' + task_names[i])(expert_concat) |
| 86 | + |
| 87 | + # build gate layers |
| 88 | + if gate_dnn_units != None: |
| 89 | + gate_network = DNN(gate_dnn_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, |
| 90 | + name='gate_' + task_names[i])(dnn_input) |
| 91 | + gate_input = gate_network |
| 92 | + else: # in origin paper, gate is one Dense layer with softmax. |
| 93 | + gate_input = dnn_input |
| 94 | + |
| 95 | + gate_out = tf.keras.layers.Dense(cur_expert_num, use_bias=False, activation='softmax', |
| 96 | + name='gate_softmax_' + task_names[i])(gate_input) |
| 97 | + gate_out = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(gate_out) |
| 98 | + |
| 99 | + # gate multiply the expert |
| 100 | + gate_mul_expert = tf.keras.layers.Multiply(name='gate_mul_expert_' + task_names[i])([expert_concat, gate_out]) |
| 101 | + gate_mul_expert = tf.keras.layers.Lambda(lambda x: reduce_sum(x, axis=1, keep_dims=True))(gate_mul_expert) |
| 102 | + cgc_outs.append(gate_mul_expert) |
| 103 | + |
| 104 | + task_outs = [] |
| 105 | + for task_type, task_name, tower_dnn, cgc_out in zip(task_types, task_names, tower_dnn_units_lists, cgc_outs): |
| 106 | + # build tower layer |
| 107 | + tower_output = DNN(tower_dnn, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, |
| 108 | + name='tower_' + task_name)(cgc_out) |
| 109 | + logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(tower_output) |
| 110 | + output = PredictionLayer(task_type, name=task_name)(logit) |
| 111 | + task_outs.append(output) |
| 112 | + |
| 113 | + model = tf.keras.models.Model(inputs=inputs_list, outputs=task_outs) |
| 114 | + return model |
0 commit comments