|
| 1 | +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. |
| 2 | +
|
| 3 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +you may not use this file except in compliance with the License. |
| 5 | +You may obtain a copy of the License at |
| 6 | +
|
| 7 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +
|
| 9 | +Unless required by applicable law or agreed to in writing, software |
| 10 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +See the License for the specific language governing permissions and |
| 13 | +limitations under the License. */ |
| 14 | + |
| 15 | +#include "paddle/operators/ftrl_op.h" |
| 16 | + |
| 17 | +namespace paddle { |
| 18 | +namespace operators { |
| 19 | + |
| 20 | +class FTRLOp : public framework::OperatorWithKernel { |
| 21 | + public: |
| 22 | + using framework::OperatorWithKernel::OperatorWithKernel; |
| 23 | + |
| 24 | + protected: |
| 25 | + void InferShape(framework::InferShapeContext *ctx) const override { |
| 26 | + PADDLE_ENFORCE(ctx->HasInput("Param"), |
| 27 | + "Input(Param) of FTRL should not be null."); |
| 28 | + PADDLE_ENFORCE(ctx->HasInput("SquaredAccumulator"), |
| 29 | + "Input(SquaredAccumulator) of FTRL should not be null."); |
| 30 | + PADDLE_ENFORCE(ctx->HasInput("LinearAccumulator"), |
| 31 | + "Input(LinearAccumulator) of FTRL should not be null."); |
| 32 | + PADDLE_ENFORCE(ctx->HasInput("Grad"), |
| 33 | + "Input(Grad) of FTRL should not be null."); |
| 34 | + PADDLE_ENFORCE(ctx->HasInput("LearningRate"), |
| 35 | + "Input(LearningRate) of FTRL should not be null."); |
| 36 | + |
| 37 | + PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), |
| 38 | + "Output(ParamOut) of FTRL should not be null."); |
| 39 | + PADDLE_ENFORCE(ctx->HasOutput("SquaredAccumOut"), |
| 40 | + "Output(SquaredAccumOut) of FTRL should not be null."); |
| 41 | + PADDLE_ENFORCE(ctx->HasOutput("LinearAccumOut"), |
| 42 | + "Output(LinearAccumOut) of FTRL should not be null."); |
| 43 | + |
| 44 | + auto param_dim = ctx->GetInputDim("Param"); |
| 45 | + PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Grad"), |
| 46 | + "Two input of FTRL Op's dimension must be same."); |
| 47 | + |
| 48 | + auto lr_dim = ctx->GetInputDim("LearningRate"); |
| 49 | + PADDLE_ENFORCE_EQ(framework::product(lr_dim), 1, |
| 50 | + "Learning Rate should be a scalar."); |
| 51 | + |
| 52 | + ctx->SetOutputDim("ParamOut", param_dim); |
| 53 | + ctx->SetOutputDim("SquaredAccumOut", param_dim); |
| 54 | + ctx->SetOutputDim("LinearAccumOut", param_dim); |
| 55 | + } |
| 56 | +}; |
| 57 | + |
| 58 | +class FTRLOpMaker : public framework::OpProtoAndCheckerMaker { |
| 59 | + public: |
| 60 | + FTRLOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) |
| 61 | + : OpProtoAndCheckerMaker(proto, op_checker) { |
| 62 | + AddInput("Param", |
| 63 | + "(Tensor, default Tensor<float>) " |
| 64 | + "Input parameter value that has to be updated."); |
| 65 | + AddInput("SquaredAccumulator", |
| 66 | + "(Tensor, default Tensor<float>) " |
| 67 | + "Accumulator that accumulates squared gradients."); |
| 68 | + AddInput("LinearAccumulator", |
| 69 | + "(Tensor, default Tensor<float>) " |
| 70 | + "Accumulator that accumulates linear gradients."); |
| 71 | + AddInput("Grad", |
| 72 | + "(Tensor, default Tensor<float>) " |
| 73 | + "Input gradient of the parameter."); |
| 74 | + AddInput("LearningRate", |
| 75 | + "(Tensor, default Tensor<float>) " |
| 76 | + "The learning rate should be a tensor of size 1."); |
| 77 | + |
| 78 | + AddOutput("ParamOut", "(Tensor) Output updated parameter value."); |
| 79 | + AddOutput("SquaredAccumOut", |
| 80 | + "(Tensor) Output accumulated squared" |
| 81 | + " gradients."); |
| 82 | + AddOutput("LinearAccumOut", |
| 83 | + "(Tensor) Output accumulated linear" |
| 84 | + " gradients."); |
| 85 | + |
| 86 | + AddAttr<float>("l1", |
| 87 | + "(float, default 0.0) " |
| 88 | + "L1 regularization strength.") |
| 89 | + .SetDefault(0.0f); |
| 90 | + AddAttr<float>("l2", |
| 91 | + "(float, default 0.0) " |
| 92 | + "L2 regularization strength.") |
| 93 | + .SetDefault(0.0f); |
| 94 | + AddAttr<float>("lr_power", |
| 95 | + "(float, default -0.5f) " |
| 96 | + "Learning Rate Power.") |
| 97 | + .SetDefault(-0.5f); |
| 98 | + AddComment(R"DOC( |
| 99 | +FTRL (Follow The Regularized Leader) Operator. |
| 100 | +
|
| 101 | +Optimizer that implements the FTRL algorithm: |
| 102 | +
|
| 103 | +$$ |
| 104 | +new\_accum = squared\_accum + grad^2 \\ |
| 105 | +if (lr\_power == -0.5) { |
| 106 | + linear\_accum += grad - (\surd(new\_accum) - \surd(squared\_accum)) / |
| 107 | + (learning\_rate * param) \\ |
| 108 | +} else { |
| 109 | + linear\_accum += grad - |
| 110 | + (new\_accum^{-lr\_power} - accum^{-lr\_power}) / |
| 111 | + (learning\_rate * param) \\ |
| 112 | +} |
| 113 | +
|
| 114 | +x = (l1 * sign(linear\_accum) - linear\_accum) |
| 115 | +if (lr\_power == -0.5) { |
| 116 | + y = \frac{\surd(new\_accum)}{learning\_rate} + (2 * l2) \\ |
| 117 | + pre\_shrink = \frac{x}{y} \\ |
| 118 | + param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0) \\ |
| 119 | +} else { |
| 120 | + y = \frac{new\_accum^{-lr\_power}}{learning\_rate} + (2 * l2) \\ |
| 121 | + pre\_shrink = \frac{x}{y} \\ |
| 122 | + param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0) \\ |
| 123 | +} |
| 124 | +squared\_accum += grad^2; |
| 125 | +$$ |
| 126 | +
|
| 127 | +The paper that proposed Follow The Regularized Leader (FTRL): |
| 128 | +(https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf) |
| 129 | +
|
| 130 | +)DOC"); |
| 131 | + } |
| 132 | +}; |
| 133 | +} // namespace operators |
| 134 | +} // namespace paddle |
| 135 | + |
| 136 | +namespace ops = paddle::operators; |
| 137 | +REGISTER_OP_WITHOUT_GRADIENT(ftrl, ops::FTRLOp, ops::FTRLOpMaker); |
| 138 | +REGISTER_OP_CPU_KERNEL(ftrl, |
| 139 | + ops::FTRLOpKernel<paddle::platform::CPUPlace, float>); |
0 commit comments