Skip to content

Commit 1722678

Browse files
Make nce support more distribution. (#13549)
* Fix truncated normal. * Fix. * Make nce support more distribution. * Fix API.spec. * Fix python API. * Fix. test=develop * Fix API.spec test=develop * Fix sampler. * Fix order of arguments in python API. test=develop
1 parent 2f27c04 commit 1722678

File tree

9 files changed

+272
-75
lines changed

9 files changed

+272
-75
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
9797
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
9898
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
9999
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
100-
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None))
100+
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0))
101101
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
102102
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
103103
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))

paddle/fluid/operators/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ op_library(flatten_op DEPS reshape_op)
308308
op_library(sequence_pad_op DEPS sequence_padding)
309309
op_library(unstack_op DEPS stack_op)
310310
op_library(fake_quantize_op DEPS memory)
311+
op_library(nce_op DEPS sampler)
311312
if (NOT WIN32)
312313
op_library(crf_decoding_op DEPS jit_kernel)
313314
op_library(fusion_lstm_op DEPS jit_kernel)

paddle/fluid/operators/math/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ math_library(cross_entropy)
4141
math_library(cos_sim_functor)
4242
math_library(depthwise_conv)
4343
math_library(im2col)
44+
math_library(sampler)
4445

4546
if (NOT WIN32) # windows do not support avx functions yet.
4647
math_library(gru_compute DEPS activation_functions math_function)
Lines changed: 91 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
22
33
Licensed under the Apache License, Version 2.0 (the "License");
44
you may not use this file except in compliance with the License.
@@ -13,58 +13,123 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/operators/math/sampler.h"
16+
#include <iostream>
17+
#include <queue>
18+
#include <utility>
19+
#include <vector>
1620

1721
namespace paddle {
18-
namespace random {
22+
namespace operators {
23+
namespace math {
1924

2025
Sampler::~Sampler() {}
2126

22-
UniformSampler::UniformSampler(int64 range)
23-
: Sampler(range), inv_range_(1.0 / range) {
24-
random_engine_ = std::make_shared<std::mt19937>(seed_);
27+
UniformSampler::UniformSampler(int64_t range, unsigned int seed)
28+
: Sampler(range, seed), inv_range_(1.0 / (range + 1)) {
29+
random_engine_ = std::make_shared<std::mt19937_64>(seed_);
2530
dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
2631
}
2732

28-
UniformSampler::UniformSampler(int64 range, unsigned int seed)
29-
: Sampler(range, seed), inv_range_(1.0 / range) {
30-
random_engine_ = std::make_shared<std::mt19937>(seed_);
31-
dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
32-
}
33-
34-
int64 UniformSampler::Sample() const { return (*dist_)(*random_engine_); }
33+
int64_t UniformSampler::Sample() const { return (*dist_)(*random_engine_); }
3534

36-
float UniformSampler::Probability(int64 value) const { return inv_range_; }
35+
float UniformSampler::Probability(int64_t value) const { return inv_range_; }
3736

38-
LogUniformSampler::LogUniformSampler(int64 range)
39-
: Sampler(range), log_range_(log(range + 1)) {
40-
random_engine_ = std::make_shared<std::mt19937>(seed_);
41-
dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
42-
}
43-
44-
LogUniformSampler::LogUniformSampler(int64 range, unsigned int seed)
37+
LogUniformSampler::LogUniformSampler(int64_t range, unsigned int seed)
4538
: Sampler(range, seed), log_range_(log(range + 1)) {
46-
random_engine_ = std::make_shared<std::mt19937>(seed_);
39+
random_engine_ = std::make_shared<std::mt19937_64>(seed_);
4740
dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
4841
}
49-
int64 LogUniformSampler::Sample() const {
42+
43+
int64_t LogUniformSampler::Sample() const {
5044
// Got Log Uniform distribution from uniform distribution by
5145
// inverse_transform_sampling method
5246
// More details:
5347
// https://wanghaoshuang.github.io/2017/11/Log-uniform-distribution-sampler/
54-
const int64 value =
55-
static_cast<int64>(exp((*dist_)(*random_engine_) * log_range_)) - 1;
48+
const int64_t value =
49+
static_cast<int64_t>(exp((*dist_)(*random_engine_) * log_range_)) - 1;
5650
// Mathematically, value should be <= range_, but might not be due to some
5751
// floating point roundoff, so we mod by range_.
5852
return value % range_;
5953
}
6054

61-
float LogUniformSampler::Probability(int64 value) const {
55+
float LogUniformSampler::Probability(int64_t value) const {
6256
// Given f(x) = 1/[(x+1) * log_range_]
6357
// The value's probability is integral of f(x) from value to (value + 1)
6458
// More details:
6559
// https://wanghaoshuang.github.io/2017/11/Log-uniform-distribution-sampler
6660
return (log((value + 2.0) / (value + 1.0))) / log_range_;
6761
}
6862

69-
} // namespace random
63+
CustomSampler::CustomSampler(int64_t range, const float* probabilities,
64+
unsigned int seed)
65+
: Sampler(range, seed) {
66+
random_engine_ = std::make_shared<std::mt19937_64>(seed_);
67+
real_dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
68+
int_dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
69+
alias_probs_ = std::make_shared<std::vector<float>>(range + 1);
70+
alias_ = std::make_shared<std::vector<int64_t>>(range + 1);
71+
probs_ = std::make_shared<std::vector<float>>(range + 1);
72+
73+
std::queue<std::pair<int64_t, float>> bigs;
74+
std::queue<std::pair<int64_t, float>> littles;
75+
for (int64_t i = 0; i <= range; ++i) {
76+
(*probs_)[i] = probabilities[i];
77+
float normal_prob = probabilities[i] * (range + 1);
78+
if (normal_prob - 1.0 > 1e-4) {
79+
bigs.emplace(i, normal_prob);
80+
} else if (1.0 - normal_prob > 1e-4) {
81+
littles.emplace(i, normal_prob);
82+
} else {
83+
(*alias_probs_)[i] = normal_prob;
84+
(*alias_)[i] = -1;
85+
}
86+
}
87+
88+
while ((!littles.empty()) && (!bigs.empty())) {
89+
auto big = bigs.front();
90+
auto little = littles.front();
91+
bigs.pop();
92+
littles.pop();
93+
(*alias_probs_)[little.first] = little.second;
94+
(*alias_)[little.first] = big.first;
95+
auto big_left = big.second - (1 - little.second);
96+
if (big_left - 1.0 > 1e-4) {
97+
bigs.emplace(big.first, big_left);
98+
} else if (1.0 - big_left > 1e-4) {
99+
littles.emplace(big.first, big_left);
100+
} else {
101+
(*alias_probs_)[big.first] = big_left;
102+
(*alias_)[big.first] = -1;
103+
}
104+
}
105+
106+
if (!littles.empty()) { // littles.second is close to 1.0
107+
auto little = littles.front();
108+
(*alias_probs_)[little.first] = 1.0;
109+
(*alias_)[little.first] = -1;
110+
}
111+
112+
if (!bigs.empty()) { // bigs.second is close to 1.0
113+
auto big = bigs.front();
114+
(*alias_probs_)[big.first] = 1.0;
115+
(*alias_)[big.first] = -1;
116+
}
117+
}
118+
119+
int64_t CustomSampler::Sample() const {
120+
auto index = (*int_dist_)(*random_engine_);
121+
auto p = (*real_dist_)(*random_engine_);
122+
if (p > (*alias_probs_)[index]) {
123+
return (*alias_)[index];
124+
} else {
125+
return index;
126+
}
127+
}
128+
129+
float CustomSampler::Probability(int64_t value) const {
130+
return (*probs_)[value];
131+
}
132+
133+
} // namespace math
134+
} // namespace operators
70135
} // namespace paddle

paddle/fluid/operators/math/sampler.h

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ limitations under the License. */
1616
#include <cstdint>
1717
#include <memory>
1818
#include <random>
19+
#include <vector>
20+
1921
namespace paddle {
2022
namespace operators {
2123
namespace math {
@@ -27,22 +29,22 @@ namespace math {
2729
*/
2830
class Sampler {
2931
public:
30-
explicit Sampler(int64_t range) : range_(range) {
31-
PADDLE_ENFORCE_GT(range, 0);
32-
std::random_device r;
33-
seed_ = r();
34-
}
35-
explicit Sampler(int64_t range, unsigned int seed)
36-
: range_(range), seed_(seed) {
37-
PADDLE_ENFORCE_GT(range, 0);
32+
explicit Sampler(int64_t range, unsigned int seed = 0UL) : range_(range) {
33+
// PADDLE_ENFORCE_GT(range, 0, "Range should be greater than 0.");
34+
if (seed == 0) {
35+
std::random_device r;
36+
seed_ = r();
37+
} else {
38+
seed_ = seed;
39+
}
3840
}
3941
virtual ~Sampler();
4042
// Sample a single value
4143
virtual int64_t Sample() const = 0;
4244
// The probability that a single call to Sample() returns the given value.
4345
virtual float Probability(int64_t value) const = 0;
4446

45-
int64 range() { return range_; }
47+
int64_t range() { return range_; }
4648

4749
protected:
4850
const int64_t range_;
@@ -56,13 +58,11 @@ class Sampler {
5658
*/
5759
class UniformSampler : public Sampler {
5860
public:
59-
explicit UniformSampler(int64_t range);
60-
61-
explicit UniformSampler(int64_t range, unsigned int seed);
61+
explicit UniformSampler(int64_t range, unsigned int seed = 0UL);
6262

6363
~UniformSampler() override {}
6464

65-
int64 Sample() const override;
65+
int64_t Sample() const override;
6666

6767
float Probability(int64_t value) const override;
6868

@@ -79,13 +79,11 @@ class UniformSampler : public Sampler {
7979
*/
8080
class LogUniformSampler : public Sampler {
8181
public:
82-
explicit LogUniformSampler(int64_t range);
83-
84-
explicit LogUniformSampler(int64_t range, unsigned int seed);
82+
explicit LogUniformSampler(int64_t range, unsigned int seed = 0UL);
8583

8684
~LogUniformSampler() override {}
8785

88-
int64 Sample() const override;
86+
int64_t Sample() const override;
8987

9088
float Probability(int64_t value) const override;
9189

@@ -95,6 +93,29 @@ class LogUniformSampler : public Sampler {
9593
std::shared_ptr<std::uniform_real_distribution<>> dist_;
9694
};
9795

96+
/**
97+
* Sample integers from [0, range) from custom distribution.
98+
*/
99+
class CustomSampler : public Sampler {
100+
public:
101+
explicit CustomSampler(int64_t range, const float* probabilities,
102+
unsigned int seed = 0UL);
103+
104+
~CustomSampler() override {}
105+
106+
int64_t Sample() const override;
107+
108+
float Probability(int64_t value) const override;
109+
110+
private:
111+
std::shared_ptr<std::vector<float>> alias_probs_;
112+
std::shared_ptr<std::vector<int64_t>> alias_;
113+
std::shared_ptr<std::vector<float>> probs_;
114+
std::shared_ptr<std::mt19937_64> random_engine_;
115+
std::shared_ptr<std::uniform_real_distribution<>> real_dist_;
116+
std::shared_ptr<std::uniform_int_distribution<>> int_dist_;
117+
};
118+
98119
} // namespace math
99120
} // namespace operators
100121
} // namespace paddle

paddle/fluid/operators/nce_op.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class NCEOp : public framework::OperatorWithKernel {
3535

3636
auto x_dims = ctx->GetInputDim("Input");
3737
auto label_dims = ctx->GetInputDim("Label");
38+
auto w_dims = ctx->GetInputDim("Weight");
3839
PADDLE_ENFORCE_EQ(x_dims[0], label_dims[0]);
3940
int num_true_classes = label_dims.size() == 2 ? label_dims[1] : 1;
4041
if (ctx->HasInput("Bias")) {
@@ -98,6 +99,13 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
9899
"each sample. And it is a dispensable input. The default value of "
99100
"sample is 1.")
100101
.AsDispensable();
102+
103+
AddInput(
104+
"CustomDistribution",
105+
"(Tensor) It is used in 'CostumDist' sampler. "
106+
"It is a tensor with shape [num_total_classes]."
107+
"The i-th element is the probsbility of the i-th class being sampled.")
108+
.AsDispensable();
101109
AddOutput("Cost",
102110
"(Tensor) A tensor of shape [batch_size, 1]. Cost of samples.");
103111
AddOutput("SampleLogits",
@@ -121,6 +129,17 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
121129
AddAttr<int>("num_neg_samples",
122130
"The number of negative classes. The default value is 10.")
123131
.SetDefault(10);
132+
133+
AddAttr<int>("sampler",
134+
"(int) Which sampler to be used to sample negative class."
135+
"0: Uniform; 1: LogUniform; 2: CostumDist.")
136+
.SetDefault(0);
137+
138+
AddAttr<int>("seed",
139+
"(int) The seed used in sampler. If it is 0, "
140+
"the sampler will generate a seed randomly.")
141+
.SetDefault(0);
142+
124143
AddAttr<std::vector<int>>("custom_neg_classes",
125144
"This attribute only be used in unitest. Classes "
126145
"in this list wiil be used as negative classes "

0 commit comments

Comments
 (0)