Skip to content

Commit 32cc11e

Browse files
Merge pull request #5945 from wanghaoshuang/sampler
Add math function for sampling integers
2 parents eaa8d68 + a123c16 commit 32cc11e

File tree

2 files changed

+170
-0
lines changed

2 files changed

+170
-0
lines changed

paddle/operators/math/sampler.cc

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "sampler.h"
16+
17+
namespace paddle {
18+
namespace random {
19+
20+
Sampler::~Sampler() {}
21+
22+
UniformSampler::UniformSampler(int64 range)
23+
: Sampler(range), inv_range_(1.0 / range) {
24+
random_engine_ = std::make_shared<std::mt19937>(seed_);
25+
dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
26+
}
27+
28+
UniformSampler::UniformSampler(int64 range, unsigned int seed)
29+
: Sampler(range, seed), inv_range_(1.0 / range) {
30+
random_engine_ = std::make_shared<std::mt19937>(seed_);
31+
dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
32+
}
33+
34+
int64 UniformSampler::Sample() const { return (*dist_)(*random_engine_); }
35+
36+
float UniformSampler::Probability(int64 value) const { return inv_range_; }
37+
38+
LogUniformSampler::LogUniformSampler(int64 range)
39+
: Sampler(range), log_range_(log(range + 1)) {
40+
random_engine_ = std::make_shared<std::mt19937>(seed_);
41+
dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
42+
}
43+
44+
LogUniformSampler::LogUniformSampler(int64 range, unsigned int seed)
45+
: Sampler(range, seed), log_range_(log(range + 1)) {
46+
random_engine_ = std::make_shared<std::mt19937>(seed_);
47+
dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
48+
}
49+
int64 LogUniformSampler::Sample() const {
50+
// Got Log Uniform distribution from uniform distribution by
51+
// inverse_transform_sampling method
52+
// More details:
53+
// https://wanghaoshuang.github.io/2017/11/Log-uniform-distribution-sampler/
54+
const int64 value =
55+
static_cast<int64>(exp((*dist_)(*random_engine_) * log_range_)) - 1;
56+
// Mathematically, value should be <= range_, but might not be due to some
57+
// floating point roundoff, so we mod by range_.
58+
return value % range_;
59+
}
60+
61+
float LogUniformSampler::Probability(int64 value) const {
62+
// Given f(x) = 1/[(x+1) * log_range_]
63+
// The value's probability is integral of f(x) from value to (value + 1)
64+
// More details:
65+
// https://wanghaoshuang.github.io/2017/11/Log-uniform-distribution-sampler
66+
return (log((value + 2.0) / (value + 1.0))) / log_range_;
67+
}
68+
69+
} // namespace random
70+
} // namespace paddle

paddle/operators/math/sampler.h

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
#include <memory>
17+
#include <random>
18+
typedef long int64;
19+
namespace paddle {
20+
namespace operators {
21+
namespace math {
22+
23+
// TODO(wanghaoshuang): Support for GPU
24+
25+
/**
26+
* Sample integers from [0, range).
27+
*/
28+
class Sampler {
29+
public:
30+
explicit Sampler(int64 range) : range_(range) {
31+
PADDLE_ENFORCE_GT(range, 0);
32+
std::random_device r;
33+
seed_ = r();
34+
}
35+
explicit Sampler(int64 range, unsigned int seed)
36+
: range_(range), seed_(seed) {
37+
PADDLE_ENFORCE_GT(range, 0);
38+
}
39+
virtual ~Sampler();
40+
// Sample a single value
41+
virtual int64 Sample() const = 0;
42+
// The probability that a single call to Sample() returns the given value.
43+
virtual float Probability(int64 value) const = 0;
44+
45+
int64 range() { return range_; };
46+
47+
protected:
48+
const int64 range_;
49+
unsigned int seed_;
50+
};
51+
52+
/**
53+
* Sample integers from [0, range).
54+
* And the distribution function is:
55+
* P(x) = 1 / range
56+
*/
57+
class UniformSampler : public Sampler {
58+
public:
59+
explicit UniformSampler(int64 range);
60+
61+
explicit UniformSampler(int64 range, unsigned int seed);
62+
63+
~UniformSampler() override {}
64+
65+
int64 Sample() const override;
66+
67+
float Probability(int64 value) const override;
68+
69+
private:
70+
const float inv_range_;
71+
std::shared_ptr<std::mt19937_64> random_engine_;
72+
std::shared_ptr<std::uniform_int_distribution<>> dist_;
73+
};
74+
75+
/**
76+
* Sample integers from [0, range).
77+
* And the distribution function is:
78+
* P(x) = (1/ln(range+1)) * ln(1 + 1/(x + 1))
79+
*/
80+
class LogUniformSampler : public Sampler {
81+
public:
82+
explicit LogUniformSampler(int64 range);
83+
84+
explicit LogUniformSampler(int64 range, unsigned int seed);
85+
86+
~LogUniformSampler() override {}
87+
88+
int64 Sample() const override;
89+
90+
float Probability(int64 value) const override;
91+
92+
private:
93+
const float log_range_;
94+
std::shared_ptr<std::mt19937_64> random_engine_;
95+
std::shared_ptr<std::uniform_real_distribution<>> dist_;
96+
};
97+
98+
} // math
99+
} // namespace operators
100+
} // namespace paddle

0 commit comments

Comments
 (0)