Skip to content

Commit 3835198

Browse files
authored
Update ratio based sampler threshold algorithm (#1391)
See OTEP 235, Threshold = (1 - Probability) * 2**56.
1 parent 23c3cda commit 3835198

File tree

3 files changed

+122
-23
lines changed

3 files changed

+122
-23
lines changed

src/SDK/Trace/Sampler/TraceIdRatioBasedSampler.php

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,20 @@
44

55
namespace OpenTelemetry\SDK\Trace\Sampler;
66

7+
use function assert;
8+
use function bin2hex;
79
use InvalidArgumentException;
810
use OpenTelemetry\Context\ContextInterface;
911
use OpenTelemetry\SDK\Common\Attribute\AttributesInterface;
1012
use OpenTelemetry\SDK\Trace\SamplerInterface;
1113
use OpenTelemetry\SDK\Trace\SamplingResult;
1214
use OpenTelemetry\SDK\Trace\Span;
15+
use function pack;
16+
use function rtrim;
17+
use function sprintf;
18+
use function substr;
19+
use function substr_compare;
20+
use function unpack;
1321

1422
/**
1523
* This implementation of the SamplerInterface records with given probability.
@@ -22,22 +30,22 @@
2230
class TraceIdRatioBasedSampler implements SamplerInterface
2331
{
2432
private readonly float $probability;
33+
private readonly string $tv;
2534

2635
/**
2736
* @param float $probability Probability float value between 0.0 and 1.0.
37+
* @param int<1, 14> $precision threshold precision in hexadecimal digits
2838
*/
29-
public function __construct(float $probability)
39+
public function __construct(float $probability, int $precision = 4)
3040
{
31-
if ($probability < 0.0 || $probability > 1.0) {
41+
if (!($probability >= 0 && $probability <= 1)) {
3242
throw new InvalidArgumentException('probability should be be between 0.0 and 1.0.');
3343
}
44+
3445
$this->probability = $probability;
46+
$this->tv = rtrim(bin2hex(substr(pack('J', self::computeTValue($probability, $precision, 4)), 1)), '0') ?: '0';
3547
}
3648

37-
/**
38-
* Returns `SamplingResult` based on probability. Respects the parent `SampleFlag`
39-
* {@inheritdoc}
40-
*/
4149
public function shouldSample(
4250
ContextInterface $parentContext,
4351
string $traceId,
@@ -46,22 +54,62 @@ public function shouldSample(
4654
AttributesInterface $attributes,
4755
array $links,
4856
): SamplingResult {
49-
// TODO: Add config to adjust which spans get sampled (only default from specification is implemented)
50-
$parentSpan = Span::fromContext($parentContext);
51-
$parentSpanContext = $parentSpan->getContext();
52-
$traceState = $parentSpanContext->getTraceState();
57+
$traceState = Span::fromContext($parentContext)->getContext()->getTraceState();
5358

54-
/**
55-
* Since php can only store up to 63 bit positive integers
56-
*/
57-
$traceIdLimit = (1 << 60) - 1;
58-
$lowerOrderBytes = hexdec(substr($traceId, strlen($traceId) - 15, 15));
59-
$traceIdCondition = $lowerOrderBytes < round($this->probability * $traceIdLimit);
60-
$decision = $traceIdCondition ? SamplingResult::RECORD_AND_SAMPLE : SamplingResult::DROP;
59+
$decision = $this->probability >= 2 ** -56 && substr_compare($traceId, $this->tv, -14) >= 0
60+
? SamplingResult::RECORD_AND_SAMPLE
61+
: SamplingResult::DROP;
6162

6263
return new SamplingResult($decision, [], $traceState);
6364
}
6465

66+
/**
67+
* Computes the 56-bit rejection threshold (T-value) for a given probability.
68+
*
69+
* The T-value is computed as `2**56*(1-$probability)` with a precision of
70+
* `2**-($wordSize*⌈-log2($probability)/$wordSize+$precision-1⌉)`.
71+
*
72+
* Values below `2**-56` will return `0`.
73+
*
74+
* ```
75+
* 1/3 w/ precision=3, wordSize=4
76+
* => 1 - 1/3
77+
* => 2/3
78+
* => 2730.666../4096
79+
* => 2731/4096
80+
* => 0xaab
81+
* ```
82+
*
83+
* Converting the result into `th` hexadecimal value:
84+
* ```
85+
* $th = rtrim(bin2hex(substr(pack('J', $t), 1)), '0') ?: '0';
86+
* ```
87+
*
88+
* @param float $probability sampling probability, must be between 0 and 1
89+
* @param int $precision precision in words
90+
* @param int $wordSize word size to use, must be a power of two
91+
* @return int 56bit T-value
92+
*
93+
* @internal
94+
*/
95+
public static function computeTValue(float $probability, int $precision, int $wordSize = 1): int
96+
{
97+
assert($probability >= 0 && $probability <= 1);
98+
assert($precision >= 1);
99+
assert($wordSize >= 1 && ($wordSize & $wordSize - 1) === 0);
100+
101+
$b = unpack('J', pack('E', $probability))[1];
102+
$e = $b >> 52 & (1 << 11) - 1;
103+
$f = $b & (1 << 52) - 1 | ($e ? 1 << 52 : 0);
104+
105+
// 56+1bit for rounding
106+
$s = $e - 1023 - 52 + 57;
107+
$t = (1 << 57) - ($s < 0 ? $f >> -$s : $f << $s);
108+
$m = -1 << 56 >> (-($e - 1023 + 1) + $precision * $wordSize & -$wordSize);
109+
110+
return $t - $m >> 1 & $m;
111+
}
112+
65113
public function getDescription(): string
66114
{
67115
return sprintf('%s{%.6F}', 'TraceIdRatioBasedSampler', $this->probability);

tests/Integration/SDK/TraceIdRatioBasedSamplerTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public function test_failing_trace_id_ratio_based_sampler_decision(): void
5151
$sampler = new TraceIdRatioBasedSampler(0.99);
5252
$decision = $sampler->shouldSample(
5353
Context::getRoot(),
54-
'4bf92f3577b34da6afffffffffffffff',
54+
'4bf92f3577b34da6a000000000000000',
5555
'test.opentelemetry.io',
5656
API\SpanKind::KIND_INTERNAL,
5757
Attributes::create([]),
@@ -65,7 +65,7 @@ public function test_passing_trace_id_ratio_based_sampler_decision(): void
6565
$sampler = new TraceIdRatioBasedSampler(0.01);
6666
$decision = $sampler->shouldSample(
6767
Context::getRoot(),
68-
'4bf92f3577b34da6a000000000000000',
68+
'4bf92f3577b34da6afffffffffffffff',
6969
'test.opentelemetry.io',
7070
API\SpanKind::KIND_INTERNAL,
7171
Attributes::create([]),

tests/Unit/SDK/Trace/Sampler/TraceIdRatioBasedSamplerTest.php

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,81 @@
44

55
namespace OpenTelemetry\Tests\SDK\Unit\Trace\Sampler;
66

7+
use function bin2hex;
78
use InvalidArgumentException;
89
use OpenTelemetry\API\Trace as API;
910
use OpenTelemetry\Context\Context;
1011
use OpenTelemetry\SDK\Common\Attribute\Attributes;
1112
use OpenTelemetry\SDK\Trace\Sampler\TraceIdRatioBasedSampler;
1213
use OpenTelemetry\SDK\Trace\SamplingResult;
14+
use function pack;
1315
use PHPUnit\Framework\Attributes\CoversClass;
1416
use PHPUnit\Framework\Attributes\DataProvider;
1517
use PHPUnit\Framework\TestCase;
18+
use function rtrim;
19+
use function substr;
1620

1721
#[CoversClass(TraceIdRatioBasedSampler::class)]
1822
class TraceIdRatioBasedSamplerTest extends TestCase
1923
{
20-
public function test_should_sample(): void
24+
#[DataProvider('shouldSampleProvider')]
25+
public function test_should_sample(string $traceId, float $probability, int $result): void
2126
{
22-
$sampler = new TraceIdRatioBasedSampler(1.0);
27+
$sampler = new TraceIdRatioBasedSampler($probability);
2328
$decision = $sampler->shouldSample(
2429
Context::getRoot(),
25-
'4bf92f3577b34da6a3ce929d0e0e4736',
30+
$traceId,
2631
'test.opentelemetry.io',
2732
API\SpanKind::KIND_INTERNAL,
2833
Attributes::create([]),
2934
[],
3035
);
31-
$this->assertEquals(SamplingResult::RECORD_AND_SAMPLE, $decision->getDecision());
36+
$this->assertEquals($result, $decision->getDecision());
37+
}
38+
39+
public static function shouldSampleProvider(): iterable
40+
{
41+
yield 'otep-0235' => ['123456789123456789d29d6a7215ced0', 0.25, SamplingResult::RECORD_AND_SAMPLE];
42+
43+
yield 'tv=0' => ['4bf92f3577b34da6a3ce929d0e0e4736', 1.0, SamplingResult::RECORD_AND_SAMPLE];
44+
yield 'tv=8' => ['4bf92f3577b34da6a3ce929d0e0e4736', 0.5, SamplingResult::RECORD_AND_SAMPLE];
45+
yield 'tv=cccd' => ['4bf92f3577b34da6a3ce929d0e0e4736', 1 / 5, SamplingResult::RECORD_AND_SAMPLE];
46+
yield 'tv=d' => ['4bf92f3577b34da6a3ce929d0e0e4736', 3 / 16, SamplingResult::DROP];
47+
48+
yield ['4bf92f3577b34da6a380000000000000', 0.5, SamplingResult::RECORD_AND_SAMPLE];
49+
yield ['4bf92f3577b34da6a37fffffffffffff', 0.5, SamplingResult::DROP];
50+
yield ['4bf92f3577b34da6a3f5560000000000', 1 / 24, SamplingResult::RECORD_AND_SAMPLE];
51+
yield ['4bf92f3577b34da6a3f554ffffffffff', 1 / 24, SamplingResult::DROP];
52+
yield ['4bf92f3577b34da6a3fffffffffffff0', 2 ** -52, SamplingResult::RECORD_AND_SAMPLE];
53+
yield ['4bf92f3577b34da6a3ffffffffffffef', 2 ** -52, SamplingResult::DROP];
54+
yield ['4bf92f3577b34da6a3ffffffffffffff', 2 ** -56, SamplingResult::RECORD_AND_SAMPLE];
55+
yield ['4bf92f3577b34da6a3fffffffffffffe', 2 ** -56, SamplingResult::DROP];
56+
yield ['4bf92f3577b34da6a3ffffffffffffff', 2 ** -57, SamplingResult::DROP];
57+
}
58+
59+
#[DataProvider('computeTValueProvider')]
60+
public function test_compute_t_value(string $expected, float $probability, int $precision): void
61+
{
62+
$tv = TraceIdRatioBasedSampler::computeTValue($probability, $precision, 4);
63+
$this->assertSame($expected, rtrim(bin2hex(substr(pack('J', $tv), 1)), '0') ?: '0');
64+
}
65+
66+
public static function computeTValueProvider(): iterable
67+
{
68+
// see https://github.com/open-telemetry/opentelemetry-specification/pull/4166
69+
yield from [['0', 1, 3], ['0', 1, 4], ['0', 1, 5]];
70+
yield from [['8', 1/2, 3], ['8', 1/2, 4], ['8', 1/2, 5]];
71+
yield from [['aab', 1/3, 3], ['aaab', 1/3, 4], ['aaaab', 1/3, 5]];
72+
yield from [['c', 1/4, 3], ['c', 1/4, 4], ['c', 1/4, 5]];
73+
yield from [['ccd', 1/5, 3], ['cccd', 1/5, 4], ['ccccd', 1/5, 5]];
74+
yield from [['e', 1/8, 3], ['e', 1/8, 4], ['e', 1/8, 5]];
75+
yield from [['e66', 1/10, 3], ['e666', 1/10, 4], ['e6666', 1/10, 5]];
76+
yield from [['f', 1/16, 3], ['f', 1/16, 4], ['f', 1/16, 5]];
77+
yield from [['fd71', 1/100, 3], ['fd70a', 1/100, 4], ['fd70a4', 1/100, 5]];
78+
yield from [['ffbe7', 1/1000, 3], ['ffbe77', 1/1000, 4], ['ffbe76d', 1/1000, 5]];
79+
yield from [['fff972', 1/10000, 3], ['fff9724', 1/10000, 4], ['fff97247', 1/10000, 5]];
80+
yield from [['ffff584', 1/100000, 3], ['ffff583a', 1/100000, 4], ['ffff583a5', 1/100000, 5]];
81+
yield from [['ffffef4', 1/1000000, 3], ['ffffef39', 1/1000000, 4], ['ffffef391', 1/1000000, 5]];
3282
}
3383

3484
#[DataProvider('invalidProbabilityProvider')]
@@ -43,6 +93,7 @@ public static function invalidProbabilityProvider(): array
4393
return [
4494
'negative' => [-0.05],
4595
'greater than one' => [1.5],
96+
'NaN' => [NAN],
4697
];
4798
}
4899

0 commit comments

Comments
 (0)