|
77 | 77 | 'lod_reset',
|
78 | 78 | 'lrn',
|
79 | 79 | 'pad',
|
| 80 | + 'label_smooth', |
80 | 81 | ]
|
81 | 82 |
|
82 | 83 |
|
@@ -3678,3 +3679,68 @@ def pad(x, paddings, pad_value=0., name=None):
|
3678 | 3679 | attrs={'paddings': paddings,
|
3679 | 3680 | 'pad_value': float(pad_value)})
|
3680 | 3681 | return out
|
| 3682 | + |
| 3683 | + |
| 3684 | +def label_smooth(label, |
| 3685 | + prior_dist=None, |
| 3686 | + epsilon=0.1, |
| 3687 | + dtype="float32", |
| 3688 | + name=None): |
| 3689 | + """ |
| 3690 | + Label smoothing is a mechanism to regularize the classifier layer and is |
| 3691 | + called label-smoothing regularization (LSR). |
| 3692 | + |
| 3693 | + Label smoothing is proposed to encourage the model to be less confident, |
| 3694 | + since optimizing the log-likelihood of the correct label directly may |
| 3695 | + cause overfitting and reduce the ability of the model to adapt. Label |
| 3696 | + smoothing replaces the ground-truth label :math:`y` with the weighted sum |
| 3697 | + of itself and some fixed distribution :math:`\mu`. For class :math:`k`, |
| 3698 | + i.e. |
| 3699 | +
|
| 3700 | + .. math:: |
| 3701 | +
|
| 3702 | + \\tilde{y_k} = (1 - \epsilon) * y_k + \epsilon * \mu_k, |
| 3703 | +
|
| 3704 | + where :math:`1 - \epsilon` and :math:`\epsilon` are the weights |
| 3705 | + respectively, and :math:`\\tilde{y}_k` is the smoothed label. Usually |
| 3706 | + uniform distribution is used for :math:`\mu`. |
| 3707 | +
|
| 3708 | + See more details about label smoothing in https://arxiv.org/abs/1512.00567. |
| 3709 | +
|
| 3710 | + Args: |
| 3711 | + label(Variable): The input variable containing the label data. The |
| 3712 | + label data should use one-hot representation. |
| 3713 | + prior_dist(Variable): The prior distribution to be used to smooth |
| 3714 | + labels. If not provided, an uniform distribution |
| 3715 | + is used. The shape of :attr:`prior_dist` should |
| 3716 | + be :math:`(1, class\_num)`. |
| 3717 | + epsilon(float): The weight used to mix up the original ground-truth |
| 3718 | + distribution and the fixed distribution. |
| 3719 | + dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, |
| 3720 | + float_64, int etc. |
| 3721 | + name(str|None): A name for this layer(optional). If set None, the layer |
| 3722 | + will be named automatically. |
| 3723 | +
|
| 3724 | + Returns: |
| 3725 | + Variable: The tensor variable containing the smoothed labels. |
| 3726 | +
|
| 3727 | + Examples: |
| 3728 | + .. code-block:: python |
| 3729 | +
|
| 3730 | + label = layers.data(name="label", shape=[1], dtype="float32") |
| 3731 | + one_hot_label = layers.one_hot(input=label, depth=10) |
| 3732 | + smooth_label = layers.label_smooth( |
| 3733 | + label=one_hot_label, epsilon=0.1, dtype="float32") |
| 3734 | + """ |
| 3735 | + if epsilon > 1. or epsilon < 0.: |
| 3736 | + raise ValueError("The value of epsilon must be between 0 and 1.") |
| 3737 | + helper = LayerHelper("label_smooth", **locals()) |
| 3738 | + label.stop_gradient = True |
| 3739 | + smooth_label = helper.create_tmp_variable(dtype) |
| 3740 | + helper.append_op( |
| 3741 | + type="label_smooth", |
| 3742 | + inputs={"X": label, |
| 3743 | + "PriorDist": prior_dist} if prior_dist else {"X": label}, |
| 3744 | + outputs={"Out": smooth_label}, |
| 3745 | + attrs={"epsilon": float(epsilon)}) |
| 3746 | + return smooth_label |
0 commit comments