diff --git a/packages/opentelemetry-node/lib/central-config.js b/packages/opentelemetry-node/lib/central-config.js index 24bf1a4b..18f34e53 100644 --- a/packages/opentelemetry-node/lib/central-config.js +++ b/packages/opentelemetry-node/lib/central-config.js @@ -420,6 +420,45 @@ const REMOTE_CONFIG_HANDLERS = [ return null; }, }, + + { + keys: ['sampling_rate'], + setter: (config, sdkInfo) => { + if (!sdkInfo.sampler) { + return `ignoring "sampling_rate" because non-default sampler in use`; + } + + const rawRate = config['sampling_rate']; + let valRate; + let verb = 'set'; + switch (typeof rawRate) { + case 'undefined': + valRate = initialConfig.sampling_rate; + verb = 'reset'; + break; + case 'number': + valRate = rawRate; + break; + case 'string': + valRate = Number(rawRate); + if (isNaN(valRate)) { + return `unknown 'sampling_rate' value: "${rawRate}"`; + } + break; + default: + return `unknown 'sampling_rate' value type: ${typeof rawRate} (${rawRate})`; + } + + if (valRate < 0 || valRate > 1) { + return `'sampling_rate' value must be between 0 and 1: ${valRate}`; + } + + sdkInfo.sampler.setRatio(valRate); + log.info(`central-config: ${verb} "sampling_rate" to "${valRate}"`); + + return null; + }, + }, ]; /** @@ -500,7 +539,7 @@ function onRemoteConfig(sdkInfo, opampClient, remoteConfig) { // Report config status. if (applyErrs.length > 0) { - log.error( + log.warn( {config, applyErrs}, 'could not apply all remote config settings' ); @@ -598,6 +637,7 @@ function setupCentralConfig(sdkInfo) { CC_LOGGING_LEVEL_FROM_LUGGITE_LEVEL[ luggite.nameFromLevel[log.level()] ?? DEFAULT_LOG_LEVEL ]; + initialConfig.sampling_rate = sdkInfo.samplingRate; initialConfig.send_traces = !sdkInfo.contextPropagationOnly; log.debug({initialConfig}, 'initial central config values'); diff --git a/packages/opentelemetry-node/lib/sampler.js b/packages/opentelemetry-node/lib/sampler.js new file mode 100644 index 00000000..ea8e7697 --- /dev/null +++ b/packages/opentelemetry-node/lib/sampler.js @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +const { + createCompositeSampler, + createComposableParentThresholdSampler, + createComposableTraceIDRatioBasedSampler, +} = require('@opentelemetry/sampler-composite'); + +/** + * @typedef {import('@opentelemetry/api').Attributes} Attributes + * @typedef {import('@opentelemetry/api').Context} Context + * @typedef {import('@opentelemetry/api').Link} Link + * @typedef {import('@opentelemetry/api').SpanKind} SpanKind + * @typedef {import('@opentelemetry/sdk-trace-base').Sampler} Sampler + * @typedef {import('@opentelemetry/sdk-trace-base').SamplingResult} SamplingResult + */ + +/** + * EDOT default sampler, a parent-based ratio sampler which can have its ratio updated dynamically. + * + * @implements {Sampler} + */ +class DefaultSampler { + #delegate; + + constructor(ratio = 1.0) { + this.#delegate = newSampler(ratio); + } + + /** + * @param {Context} context + * @param {string} traceId + * @param {string} spanName + * @param {SpanKind} spanKind + * @param {Attributes} attributes + * @param {Link[]} links + * @returns {SamplingResult} + */ + shouldSample(context, traceId, spanName, spanKind, attributes, links) { + return this.#delegate.shouldSample( + context, + traceId, + spanName, + spanKind, + attributes, + links + ); + } + + /** + * @param {number} ratio + */ + setRatio(ratio) { + this.#delegate = newSampler(ratio); + } + + toString() { + return this.#delegate.toString(); + } +} + +/** + * @param {number} ratio A number between 0 and 1 representing the sampling ratio. + */ +function newSampler(ratio) { + return createCompositeSampler( + createComposableParentThresholdSampler( + createComposableTraceIDRatioBasedSampler(ratio) + ) + ); +} + +/** + * Creates a default EDOT sampler, which is a parent-based ratio sampler that can have + * its ratio updated dynamically by central config. + * + * @param {number} ratio + * @returns {Sampler} A ratio sampler which can have its ratio updated dynamically. + */ +function createDefaultSampler(ratio) { + return new DefaultSampler(ratio); +} + +module.exports = { + createDefaultSampler, +}; diff --git a/packages/opentelemetry-node/lib/sdk.js b/packages/opentelemetry-node/lib/sdk.js index 74eeb0fe..835a0c90 100644 --- a/packages/opentelemetry-node/lib/sdk.js +++ b/packages/opentelemetry-node/lib/sdk.js @@ -9,6 +9,7 @@ const os = require('os'); const { getBooleanFromEnv, + getNumberFromEnv, getStringFromEnv, getStringListFromEnv, } = require('@opentelemetry/core'); @@ -45,10 +46,12 @@ const { setupDynConfExporters, dynConfSpanExporters, } = require('./dynconf'); +const {createDefaultSampler} = require('./sampler'); const DISTRO_VERSION = require('../package.json').version; /** * @typedef {import('@opentelemetry/sdk-node').NodeSDKConfiguration} NodeSDKConfiguration + * @typedef {import('@opentelemetry/sdk-trace-base').Sampler} Sampler */ /** @@ -241,6 +244,26 @@ function startNodeSDK(cfg = {}) { const config = {...defaultConfig, ...cfg}; + /** @type {Sampler} */ + let sampler = undefined; + let samplingRate = 1.0; + if (!config.sampler && !getStringFromEnv('OTEL_TRACES_SAMPLER')) { + // If the user has not set a sampler via config or env var, use our default sampler. + // First get as string to differentiate between missing and invalid. + if (getStringFromEnv('OTEL_TRACES_SAMPLER_ARG')) { + const samplingRateArg = getNumberFromEnv('OTEL_TRACES_SAMPLER_ARG'); + if (samplingRateArg === undefined) { + log.warn( + `Invalid OTEL_TRACES_SAMPLER_ARG value: ${process.env.OTEL_TRACES_SAMPLER_ARG}. Using default sampling rate of ${samplingRate}` + ); + } else { + samplingRate = samplingRateArg; + } + } + sampler = createDefaultSampler(samplingRate); + config.sampler = sampler; + } + // Some tricks to get a handle on noop signal providers, to be used for // dynamic configuration. const tracerProviderProxy = new api.ProxyTracerProvider(); @@ -306,6 +329,8 @@ function startNodeSDK(cfg = {}) { noopTracerProvider, // @ts-ignore: Ignore access of private _tracerProvider for now. (TODO) sdkTracerProvider: sdk._tracerProvider, + sampler, + samplingRate, contextPropagationOnly, }); diff --git a/packages/opentelemetry-node/package-lock.json b/packages/opentelemetry-node/package-lock.json index 520188c0..f2bf14bf 100644 --- a/packages/opentelemetry-node/package-lock.json +++ b/packages/opentelemetry-node/package-lock.json @@ -63,6 +63,7 @@ "@opentelemetry/resource-detector-azure": "^0.14.0", "@opentelemetry/resource-detector-container": "^0.7.0", "@opentelemetry/resources": "^2.0.0", + "@opentelemetry/sampler-composite": "^0.206.0", "@opentelemetry/sdk-logs": "^0.206.0", "@opentelemetry/sdk-metrics": "^2.0.0", "@opentelemetry/sdk-node": "^0.206.0", @@ -3669,6 +3670,70 @@ "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, + "node_modules/@opentelemetry/sampler-composite": { + "version": "0.206.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/sampler-composite/-/sampler-composite-0.206.0.tgz", + "integrity": "sha512-EEegHAitmUnaou4Sz8/9dmHquI2lCJGjvbEl42AkgiuqOuUG2MbIB3Ip6SqV6UVJSq6mi8NgERzofLjjK8whMA==", + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/core": "2.0.1", + "@opentelemetry/sdk-trace-base": "2.0.1" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/sampler-composite/node_modules/@opentelemetry/core": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.0.1.tgz", + "integrity": "sha512-MaZk9SJIDgo1peKevlbhP6+IwIiNPNmswNL4AF0WaQJLbHXjr9SrZMgS12+iqr9ToV4ZVosCcc0f8Rg67LXjxw==", + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/semantic-conventions": "^1.29.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/sampler-composite/node_modules/@opentelemetry/resources": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.0.1.tgz", + "integrity": "sha512-dZOB3R6zvBwDKnHDTB4X1xtMArB/d324VsbiPkX/Yu0Q8T2xceRthoIVFhJdvgVM2QhGVUyX9tzwiNxGtoBJUw==", + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/core": "2.0.1", + "@opentelemetry/semantic-conventions": "^1.29.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.3.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/sampler-composite/node_modules/@opentelemetry/sdk-trace-base": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.0.1.tgz", + "integrity": "sha512-xYLlvk/xdScGx1aEqvxLwf6sXQLXCjk3/1SQT9X9AoN5rXRhkdvIFShuNNmtTEPRBqcsMbS4p/gJLNI2wXaDuQ==", + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/core": "2.0.1", + "@opentelemetry/resources": "2.0.1", + "@opentelemetry/semantic-conventions": "^1.29.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.3.0 <1.10.0" + } + }, "node_modules/@opentelemetry/sdk-logs": { "version": "0.206.0", "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-logs/-/sdk-logs-0.206.0.tgz", diff --git a/packages/opentelemetry-node/package.json b/packages/opentelemetry-node/package.json index 40c1c52e..9569fd02 100644 --- a/packages/opentelemetry-node/package.json +++ b/packages/opentelemetry-node/package.json @@ -126,6 +126,7 @@ "@opentelemetry/resource-detector-azure": "^0.14.0", "@opentelemetry/resource-detector-container": "^0.7.0", "@opentelemetry/resources": "^2.0.0", + "@opentelemetry/sampler-composite": "^0.206.0", "@opentelemetry/sdk-logs": "^0.206.0", "@opentelemetry/sdk-metrics": "^2.0.0", "@opentelemetry/sdk-node": "^0.206.0", diff --git a/packages/opentelemetry-node/test/OTEL_TRACES_SAMPLER.js b/packages/opentelemetry-node/test/OTEL_TRACES_SAMPLER.js new file mode 100644 index 00000000..bb2d3dfc --- /dev/null +++ b/packages/opentelemetry-node/test/OTEL_TRACES_SAMPLER.js @@ -0,0 +1,53 @@ +/* + * Copyright Elasticsearch B.V. and contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +const {test} = require('tape'); +const {runTestFixtures} = require('./testutils'); + +/** @type {import('./testutils').TestFixture[]} */ +const testFixtures = [ + { + name: 'OTEL_TRACES_SAMPLER unset (default sampling of 100%)', + args: ['./fixtures/use-http-get.js'], + cwd: __dirname, + env: { + NODE_OPTIONS: '--import=@elastic/opentelemetry-node', + }, + checkTelemetry: (t, col) => { + t.equal(col.sortedSpans.length, 1); + }, + }, + { + name: 'OTEL_TRACES_SAMPLER unset, OTEL_TRACES_SAMPLER_ARG=0 (no sampling)', + args: ['./fixtures/use-http-get.js'], + cwd: __dirname, + env: { + NODE_OPTIONS: '--import=@elastic/opentelemetry-node', + OTEL_TRACES_SAMPLER_ARG: '0', + }, + checkTelemetry: (t, col) => { + t.equal(col.sortedSpans.length, 0); + }, + }, + { + name: 'OTEL_TRACES_SAMPLER=always_off, (no sampling)', + args: ['./fixtures/use-http-get.js'], + cwd: __dirname, + env: { + NODE_OPTIONS: '--import=@elastic/opentelemetry-node', + OTEL_TRACES_SAMPLER: 'always_off', + }, + checkTelemetry: (t, col) => { + t.equal(col.sortedSpans.length, 0); + }, + }, +]; + +// ----- main line ----- + +test('OTEL_TRACES_SAMPLER', (suite) => { + runTestFixtures(suite, testFixtures); + suite.end(); +}); diff --git a/packages/opentelemetry-node/test/central-config.test.js b/packages/opentelemetry-node/test/central-config.test.js index 8ce8bdde..1c418ea3 100644 --- a/packages/opentelemetry-node/test/central-config.test.js +++ b/packages/opentelemetry-node/test/central-config.test.js @@ -770,6 +770,165 @@ test('central-config', (suite) => { }, }, + { + name: 'central-config-gen-telemetry.js sampling_rate=0.0', + args: ['./fixtures/central-config-gen-telemetry.js'], + cwd: __dirname, + env: () => { + return { + NODE_OPTIONS: '--import @elastic/opentelemetry-node', + ELASTIC_OTEL_NODE_ENABLE_LOG_SENDING: 'true', + // Skip cloud resource detectors to avoid delay and noise. + OTEL_NODE_RESOURCE_DETECTORS: + 'env,host,os,process,serviceinstance,container', + ELASTIC_OTEL_OPAMP_ENDPOINT: opampServer.endpoint, + ELASTIC_OTEL_EXPERIMENTAL_OPAMP_HEARTBEAT_INTERVAL: '300', + ELASTIC_OTEL_TEST_OPAMP_CLIENT_DIAG_ENABLED: 'true', + // Set a short metric export interval to allow the + // fixture script to wait for an interval after receiving + // central config before proceeding. + OTEL_METRIC_EXPORT_INTERVAL: '500', + OTEL_METRIC_EXPORT_TIMEOUT: '450', + }; + }, + before: () => { + const config = { + sampling_rate: '0.0', + }; + opampServer.setAgentConfigMap({ + configMap: { + elastic: { + body: Buffer.from(JSON.stringify(config), 'utf8'), + contentType: 'application/json', + }, + }, + }); + }, + after: () => { + opampServer.setAgentConfigMap({configMap: {}}); + }, + // verbose: true, + checkTelemetry: (t, col) => { + t.equal(col.sortedSpans.length, 0, 'no spans'); + }, + }, + + { + name: 'central-config-gen-telemetry.js sampling_rate=0.0 non-default sampler', + args: ['./fixtures/central-config-gen-telemetry.js'], + cwd: __dirname, + env: () => { + return { + NODE_OPTIONS: '--import @elastic/opentelemetry-node', + ELASTIC_OTEL_NODE_ENABLE_LOG_SENDING: 'true', + // Skip cloud resource detectors to avoid delay and noise. + OTEL_NODE_RESOURCE_DETECTORS: + 'env,host,os,process,serviceinstance,container', + ELASTIC_OTEL_OPAMP_ENDPOINT: opampServer.endpoint, + ELASTIC_OTEL_EXPERIMENTAL_OPAMP_HEARTBEAT_INTERVAL: '300', + ELASTIC_OTEL_TEST_OPAMP_CLIENT_DIAG_ENABLED: 'true', + // Set a short metric export interval to allow the + // fixture script to wait for an interval after receiving + // central config before proceeding. + OTEL_METRIC_EXPORT_INTERVAL: '500', + OTEL_METRIC_EXPORT_TIMEOUT: '450', + OTEL_TRACES_SAMPLER: 'traceidratio', + OTEL_TRACES_SAMPLER_ARG: '1.0', + }; + }, + before: () => { + const config = { + sampling_rate: '0.0', + }; + opampServer.setAgentConfigMap({ + configMap: { + elastic: { + body: Buffer.from(JSON.stringify(config), 'utf8'), + contentType: 'application/json', + }, + }, + }); + }, + after: () => { + opampServer.setAgentConfigMap({configMap: {}}); + }, + // verbose: true, + checkTelemetry: (t, col, stdout) => { + t.equal(col.sortedSpans.length, 3, 'got all the spans'); + const recs = stdout + .split(/\r?\n/g) + .filter((ln) => ln.startsWith('{')) + .map((ln) => JSON.parse(ln)); + const rec = findObjInArray( + recs, + 'msg', + 'could not apply all remote config settings' + ); + t.ok(rec); + t.equal( + rec.applyErrs[0], + 'ignoring "sampling_rate" because non-default sampler in use' + ); + }, + }, + + { + name: 'central-config-gen-telemetry.js sampling_rate=-1.0', + args: ['./fixtures/central-config-gen-telemetry.js'], + cwd: __dirname, + env: () => { + return { + NODE_OPTIONS: '--import @elastic/opentelemetry-node', + ELASTIC_OTEL_NODE_ENABLE_LOG_SENDING: 'true', + // Skip cloud resource detectors to avoid delay and noise. + OTEL_NODE_RESOURCE_DETECTORS: + 'env,host,os,process,serviceinstance,container', + ELASTIC_OTEL_OPAMP_ENDPOINT: opampServer.endpoint, + ELASTIC_OTEL_EXPERIMENTAL_OPAMP_HEARTBEAT_INTERVAL: '300', + ELASTIC_OTEL_TEST_OPAMP_CLIENT_DIAG_ENABLED: 'true', + // Set a short metric export interval to allow the + // fixture script to wait for an interval after receiving + // central config before proceeding. + OTEL_METRIC_EXPORT_INTERVAL: '500', + OTEL_METRIC_EXPORT_TIMEOUT: '450', + }; + }, + before: () => { + const config = { + sampling_rate: '-1.0', + }; + opampServer.setAgentConfigMap({ + configMap: { + elastic: { + body: Buffer.from(JSON.stringify(config), 'utf8'), + contentType: 'application/json', + }, + }, + }); + }, + after: () => { + opampServer.setAgentConfigMap({configMap: {}}); + }, + // verbose: true, + checkTelemetry: (t, col, stdout) => { + t.equal(col.sortedSpans.length, 3, 'got all the spans'); + const recs = stdout + .split(/\r?\n/g) + .filter((ln) => ln.startsWith('{')) + .map((ln) => JSON.parse(ln)); + const rec = findObjInArray( + recs, + 'msg', + 'could not apply all remote config settings' + ); + t.ok(rec); + t.equal( + rec.applyErrs[0], + "'sampling_rate' value must be between 0 and 1: -1" + ); + }, + }, + // TODO: Test unpatching cases with ESM. Does that work? ]; diff --git a/packages/opentelemetry-node/types/sampler.d.ts b/packages/opentelemetry-node/types/sampler.d.ts new file mode 100644 index 00000000..6a200f6e --- /dev/null +++ b/packages/opentelemetry-node/types/sampler.d.ts @@ -0,0 +1,14 @@ +export type Attributes = import('@opentelemetry/api').Attributes; +export type Context = import('@opentelemetry/api').Context; +export type Link = import('@opentelemetry/api').Link; +export type SpanKind = import('@opentelemetry/api').SpanKind; +export type Sampler = import('@opentelemetry/sdk-trace-base').Sampler; +export type SamplingResult = import('@opentelemetry/sdk-trace-base').SamplingResult; +/** + * Creates a default EDOT sampler, which is a parent-based ratio sampler that can have + * its ratio updated dynamically by central config. + * + * @param {number} ratio + * @returns {Sampler} A ratio sampler which can have its ratio updated dynamically. + */ +export function createDefaultSampler(ratio: number): Sampler; diff --git a/packages/opentelemetry-node/types/sdk.d.ts b/packages/opentelemetry-node/types/sdk.d.ts index 34dcef12..9f26274e 100644 --- a/packages/opentelemetry-node/types/sdk.d.ts +++ b/packages/opentelemetry-node/types/sdk.d.ts @@ -1,4 +1,5 @@ export type NodeSDKConfiguration = import('@opentelemetry/sdk-node').NodeSDKConfiguration; +export type Sampler = import('@opentelemetry/sdk-trace-base').Sampler; export type ElasticNodeSDKOptions = { /** * - Whether to setup handlers diff --git a/scripts/gen-notice.sh b/scripts/gen-notice.sh index a95f1e6b..58fa2a95 100755 --- a/scripts/gen-notice.sh +++ b/scripts/gen-notice.sh @@ -87,10 +87,12 @@ npm ls --omit=dev --all --parseable \ "tr46": "license.MIT.txt", "@bufbuild/protobuf": "license.apache2.txt", "safe-json-stringify": "license.MIT.txt", + // Releases after https://github.com/open-telemetry/opentelemetry-js/pull/6002 + // will have a LICENSE file. + "@opentelemetry/sampler-composite": "license.apache2.txt", } const licTypeFromPkgName = { - // instr-openai will get the license field in https://github.com/elastic/elastic-otel-node/pull/1015 - "@opentelemetry/instrumentation-openai": "Apache-2.0", + // Packages that have a license, but no "license" entry in package.json. } const allowNoLicFile = [ "binary-search" // CC is a public domain dedication, no need for license text.