|
| 1 | +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +import math |
| 16 | +import paddle |
| 17 | + |
| 18 | +from net import xDeepFMLayer |
| 19 | + |
| 20 | + |
| 21 | +class StaticModel(): |
| 22 | + def __init__(self, config): |
| 23 | + self.cost = None |
| 24 | + self.config = config |
| 25 | + self._init_hyper_parameters() |
| 26 | + |
| 27 | + def _init_hyper_parameters(self): |
| 28 | + self.is_distributed = False |
| 29 | + self.distributed_embedding = False |
| 30 | + |
| 31 | + if self.config.get("hyper_parameters.distributed_embedding", 0) == 1: |
| 32 | + self.distributed_embedding = True |
| 33 | + |
| 34 | + self.sparse_feature_number = self.config.get( |
| 35 | + "hyper_parameters.sparse_feature_number") |
| 36 | + self.sparse_feature_dim = self.config.get( |
| 37 | + "hyper_parameters.sparse_feature_dim") |
| 38 | + self.sparse_inputs_slot = self.config.get( |
| 39 | + "hyper_parameters.sparse_inputs_slots") |
| 40 | + self.dense_input_dim = self.config.get( |
| 41 | + "hyper_parameters.dense_input_dim") |
| 42 | + self.learning_rate = self.config.get( |
| 43 | + "hyper_parameters.optimizer.learning_rate") |
| 44 | + #self.fc_sizes = self.config.get("hyper_parameters.fc_sizes") |
| 45 | + self.layer_sizes_cin = self.config.get( |
| 46 | + "hyper_parameters.layer_sizes_cin") |
| 47 | + self.layer_sizes_dnn = self.config.get( |
| 48 | + "hyper_parameters.layer_sizes_dnn") |
| 49 | + |
| 50 | + def create_feeds(self, is_infer=False): |
| 51 | + dense_input = paddle.static.data( |
| 52 | + name="dense_input", |
| 53 | + shape=[None, self.dense_input_dim], |
| 54 | + dtype="float32") |
| 55 | + |
| 56 | + sparse_input_ids = [ |
| 57 | + paddle.static.data( |
| 58 | + name="C" + str(i), shape=[None, 1], dtype="int64") |
| 59 | + for i in range(1, self.sparse_inputs_slot) |
| 60 | + ] |
| 61 | + |
| 62 | + label = paddle.static.data( |
| 63 | + name="label", shape=[None, 1], dtype="int64") |
| 64 | + |
| 65 | + self._sparse_data_var = [label] + sparse_input_ids |
| 66 | + self._dense_data_var = [dense_input] |
| 67 | + |
| 68 | + feeds_list = [label] + sparse_input_ids + [dense_input] |
| 69 | + return feeds_list |
| 70 | + |
| 71 | + def net(self, input, is_infer=False): |
| 72 | + self.sparse_inputs = input[1:self.sparse_inputs_slot] |
| 73 | + self.dense_input = input[-1] |
| 74 | + self.label_input = input[0] |
| 75 | + sparse_number = self.sparse_inputs_slot - 1 |
| 76 | + assert sparse_number == len(self.sparse_inputs) |
| 77 | + |
| 78 | + xdeepfm_model = xDeepFMLayer( |
| 79 | + self.sparse_feature_number, self.sparse_feature_dim, |
| 80 | + self.dense_input_dim, sparse_number, self.layer_sizes_cin, |
| 81 | + self.layer_sizes_dnn) |
| 82 | + |
| 83 | + pred = xdeepfm_model(self.sparse_inputs, self.dense_input) |
| 84 | + |
| 85 | + #pred = F.sigmoid(prediction) |
| 86 | + |
| 87 | + predict_2d = paddle.concat(x=[1 - pred, pred], axis=1) |
| 88 | + |
| 89 | + auc, batch_auc_var, _ = paddle.static.auc(input=predict_2d, |
| 90 | + label=self.label_input, |
| 91 | + slide_steps=0) |
| 92 | + |
| 93 | + self.inference_target_var = auc |
| 94 | + if is_infer: |
| 95 | + fetch_dict = {'auc': auc} |
| 96 | + return fetch_dict |
| 97 | + |
| 98 | + cost = paddle.nn.functional.log_loss( |
| 99 | + input=pred, label=paddle.cast( |
| 100 | + self.label_input, dtype="float32")) |
| 101 | + avg_cost = paddle.mean(x=cost) |
| 102 | + self._cost = avg_cost |
| 103 | + fetch_dict = {'cost': avg_cost, 'auc': auc} |
| 104 | + return fetch_dict |
| 105 | + |
| 106 | + def create_optimizer(self, strategy=None): |
| 107 | + optimizer = paddle.optimizer.Adam( |
| 108 | + learning_rate=self.learning_rate, lazy_mode=True) |
| 109 | + if strategy != None: |
| 110 | + import paddle.distributed.fleet as fleet |
| 111 | + optimizer = fleet.distributed_optimizer(optimizer, strategy) |
| 112 | + optimizer.minimize(self._cost) |
| 113 | + |
| 114 | + def infer_net(self, input): |
| 115 | + return self.net(input, is_infer=True) |
0 commit comments