diff --git a/README.md b/README.md index 4394210..bd0f5b4 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ Where `options` is an object and can contain the following: * See more details on setting your site at: https://docs.datadoghq.com/getting_started/site/#access-the-datadog-site * You can also set this via the `DATADOG_SITE` or `DD_SITE` environment variable. + * Ignored if you set the `reporter` option. * `apiKey`: Sets the Datadog API key. (optional) * It's usually best to keep this in an environment variable. Datadog-metrics looks for the API key in the `DATADOG_API_KEY` or @@ -129,6 +130,7 @@ Where `options` is an object and can contain the following: is required to send metrics. * Make sure not to confuse this with your _application_ key! For more details, see: https://docs.datadoghq.com/account_management/api-app-keys/ + * Ignored if you set the `reporter` option. * `appKey`: ⚠️ Deprecated. This does nothing and will be removed in an upcoming release. @@ -145,6 +147,14 @@ Where `options` is an object and can contain the following: same properties as the options object on the `histogram()` method. Options specified when calling the method are layered on top of this object. (optional) +* `retries`: How many times to retry failed metric submissions to Datadog’s API. + * Defaults to `2`. + * Ignored if you set the `reporter` option. +* `retryBackoff`: How long to wait before retrying a failed Datadog API call. + Subsequent retries multiply this delay by 2^(retry count). For example, if + this is set to `1`, retries will happen after 1, then 2, then 4 seconds. + * Defaults to `1`. + * Ignored if you set the `reporter` option. * `reporter`: An object that actually sends the buffered metrics. (optional) * There are two built-in reporters you can use: 1. `reporters.DatadogReporter` sends metrics to Datadog’s API, and is @@ -330,17 +340,23 @@ Contributions are always welcome! For more info on how to contribute or develop **Breaking Changes:** - TBD + * The `DatadogReporter` constructor now takes an options object instead of positional arguments. Using this constructor directly is pretty rare, so this likely doesn’t affect you! **New Features:** - * Asynchronous actions now use promises instead of callbacks. In places where `onSuccess` and `onError` callbacks were used, they are now deprecated. Instead, those methods return promises (callbacks still work, but support will be removed in a future release). This affects: + * Promises: asynchronous actions now use promises instead of callbacks. In places where `onSuccess` and `onError` callbacks were used, they are now deprecated. Instead, those methods return promises (callbacks still work, but support will be removed in a future release). This affects: * The `flush()` method now returns a promise. * The `report(series)` method on any custom reporters should now return a promise. For now, datadog-metrics will use the old callback-based behavior if the method signature has callbacks listed after `series` argument. + * Retries: flushes to Datadog’s API are now retried automatically. This can help you work around intermittent network issues or rate limits. To adjust retries, use the `retries` and `retryBackoff` options. + * Environment variables can now be prefixed with *either* `DATADOG_` or `DD_` (previously, only `DATADOG_` worked) in order to match configuration with the Datadog agent. For example, you can set your API key via `DATADOG_API_KEY` or `DD_API_KEY`. + **Deprecations:** + + * The `appKey` option is no longer supported. Application keys (as opposed to API keys) are not actually needed for sending metrics or distributions to the Datadog API. Including it in your configuration adds no benefits, but risks exposing a sensitive credential. + **Bug Fixes:** * Support setting the `site` option via the `DATADOG_SITE` environment variable. The `apiHost` option was renamed to `site` in v0.11.0, but the `DATADOG_API_HOST` environment variable was accidentally left as-is. The old environment variable name is now deprecated, and will be removed at the same time as the `apiHost` option is removed. @@ -349,8 +365,6 @@ Contributions are always welcome! For more info on how to contribute or develop * Buffer metrics using `Map` instead of a plain object. - * Deprecated the `appKey` option. Application keys (as opposed to API keys) are not actually needed for sending metrics or distributions to the Datadog API. Including it in your configuration adds no benefits, but risks exposing a sensitive credential. - [View diff](https://github.com/dbader/node-datadog-metrics/compare/v0.11.4...main) * 0.11.4 (2024-11-10) diff --git a/lib/loggers.js b/lib/loggers.js index 05e373b..c6ad360 100644 --- a/lib/loggers.js +++ b/lib/loggers.js @@ -39,13 +39,15 @@ const Distribution = require('./metrics').Distribution; /** * @typedef {object} BufferedMetricsLoggerOptions - * @property {string} [apiKey] Datadog API key + * @property {string} [apiKey] Datadog API key. Ignored if you set the + * `reporter` option. * @property {string} [appKey] DEPRECATED: App keys aren't actually used for * metrics and are no longer supported. * @property {string} [host] Default host for all reported metrics * @property {string} [prefix] Default key prefix for all metrics * @property {string} [site] Sets the Datadog "site", or server where metrics - * are sent. For details and options, see: + * are sent. Ignored if you set the `reporter` option. + * For details and options, see: * https://docs.datadoghq.com/getting_started/site/#access-the-datadog-site * @property {string} [apiHost] DEPRECATED: Please use `site` instead. * @property {number} [flushIntervalSeconds] How often to send metrics to @@ -66,6 +68,11 @@ const Distribution = require('./metrics').Distribution; * metrics between flushes. * @property {ReporterType} [reporter] An object that actually sends the * buffered metrics. + * @property {number} [retries] How many times to retry failed attempts to send + * metrics to Datadog's API. Ignored if you set the `reporter` option. + * @property {number} [retryBackoff] How many seconds to wait before retrying a + * failed API request. Subsequent retries will multiply this delay. + * Ignored if you set the `reporter` option. */ /** @@ -99,7 +106,12 @@ class BufferedMetricsLogger { /** @private */ this.aggregator = opts.aggregator || new Aggregator(opts.defaultTags); /** @private @type {ReporterType} */ - this.reporter = opts.reporter || new DatadogReporter(opts.apiKey, opts.site); + this.reporter = opts.reporter || new DatadogReporter({ + apiKey: opts.apiKey, + site: opts.site, + retries: opts.retries, + retryBackoff: opts.retryBackoff + }); /** @private */ this.host = opts.host; /** @private */ diff --git a/lib/reporters.js b/lib/reporters.js index e5d51ac..f74822e 100644 --- a/lib/reporters.js +++ b/lib/reporters.js @@ -3,6 +3,18 @@ const datadogApiClient = require('@datadog/datadog-api-client'); const { AuthorizationError } = require('./errors'); const { logDebug, logDeprecation } = require('./logging'); +const RETRYABLE_ERROR_CODES = new Set([ + 'ECONNREFUSED', + 'ECONNRESET', + 'ENOTFOUND', + 'EPIPE', + 'ETIMEDOUT' +]); + +async function sleep(milliseconds) { + await new Promise((r) => setTimeout(r, milliseconds)); +} + /** * A Reporter that throws away metrics instead of sending them to Datadog. This * is useful for disabling metrics in your application and for tests. @@ -13,6 +25,99 @@ class NullReporter { } } +/** + * @private + * A custom HTTP implementation for Datadog that retries failed requests. + * Datadog has retries built in, but they don't handle network errors (just + * HTTP errors), and we want to retry in both cases. This inherits from the + * built-in HTTP library since we want to use the same fetch implementation + * Datadog uses instead of adding another dependency. + */ +class RetryHttp extends datadogApiClient.client.IsomorphicFetchHttpLibrary { + constructor(options = {}) { + super(options); + + // HACK: ensure enableRetry is always `false` so the base class logic + // does not actually retry (since we manage retries here). + Object.defineProperty(this, 'enableRetry', { + get () { return false; }, + set () {}, + }); + } + + async send(request) { + let i = 0; + while (true) { // eslint-disable-line no-constant-condition + let response, error; + try { + response = await super.send(request); + } catch (e) { + error = e; + } + + if (this.isRetryable(response || error, i)) { + await sleep(this.retryDelay(response || error, i)); + } else if (response) { + return response; + } else { + throw error; + } + + i++; + } + } + + /** + * @private + * @param {any} response HTTP response or error object + * @returns {boolean} + */ + isRetryable(response, tryCount) { + return tryCount < this.maxRetries && ( + RETRYABLE_ERROR_CODES.has(response.code) + || response.httpStatusCode === 429 + || response.httpStatusCode >= 500 + ); + } + + /** + * @private + * @param {any} response HTTP response or error object + * @param {number} tryCount + * @returns {number} + */ + retryDelay(response, tryCount) { + if (response.httpStatusCode === 429) { + // Datadog's official client supports just the 'x-ratelimit-reset' + // header, so we support that here in addition to the standardized + // 'retry-after' heaer. + // There is also an upcoming IETF standard for 'ratelimit', but it + // has moved away from the syntax used in 'x-ratelimit-reset'. This + // stuff might change in the future. + // https://datatracker.ietf.org/doc/draft-ietf-httpapi-ratelimit-headers/ + const delayHeader = response.headers['retry-after'] + || response.headers['x-ratelimit-reset']; + const delayValue = parseInt(delayHeader, 10); + if (!isNaN(delayValue) && delayValue > 0) { + return delayValue * 1000; + } + } + + return this.backoffMultiplier ** tryCount * this.backoffBase * 1000; + } +} + +/** + * @typedef {Object} DatadogReporterOptions + * @property {string} [apiKey] Datadog API key. + * @property {string} [appKey] DEPRECATED! This option does nothing. + * @property {string} [site] The Datadog "site" to send metrics to. + * @property {number} [retries] Retry failed requests up to this many times. + * @property {number} [retryBackoff] Delay before retries. Subsequent retries + * wait this long multiplied by 2^(retry count). + */ + +/** @type {WeakMap} */ const datadogClients = new WeakMap(); /** @@ -21,40 +126,48 @@ const datadogClients = new WeakMap(); class DatadogReporter { /** * Create a reporter that sends metrics to Datadog's API. - * @param {string} [apiKey] - * @param {string} [appKey] DEPRECATED! This argument does nothing. - * @param {string} [site] + * @param {DatadogReporterOptions} [options] */ - constructor(apiKey, appKey, site) { - if (appKey) { - if (!site && /(datadoghq|ddog-gov)\./.test(appKey)) { - site = appKey; - appKey = null; - } else { - logDeprecation( - 'The `appKey` option is no longer supported since it is ' + - 'not used for submitting metrics, distributions, events, ' + - 'or logs.' - ); - } + constructor(options = {}) { + if (typeof options !== 'object') { + throw new TypeError('DatadogReporter takes an options object, not multiple string arguments.'); } - apiKey = apiKey || process.env.DATADOG_API_KEY || process.env.DD_API_KEY; - this.site = site || process.env.DATADOG_SITE || process.env.DD_SITE || process.env.DATADOG_API_HOST; + if (options.appKey) { + logDeprecation( + 'The `appKey` option is no longer supported since it is ' + + 'not used for submitting metrics, distributions, events, ' + + 'or logs.' + ); + } + + const apiKey = options.apiKey || process.env.DATADOG_API_KEY || process.env.DD_API_KEY; + this.site = options.site + || process.env.DATADOG_SITE + || process.env.DD_SITE + || process.env.DATADOG_API_HOST; if (!apiKey) { throw new Error( - 'Datadog API key not found. You must specify one via a ' + - 'configuration option or the DATADOG_API_KEY (or DD_API_KEY) ' + - 'environment variable.' + 'Datadog API key not found. You must specify one via the ' + + '`apiKey` configuration option or the DATADOG_API_KEY or ' + + 'DD_API_KEY environment variable.' ); } const configuration = datadogApiClient.client.createConfiguration({ authMethods: { apiKeyAuth: apiKey, - } + }, + httpApi: new RetryHttp(), + maxRetries: options.retries >= 0 ? options.retries : 2, }); + + // HACK: Specify backoff here rather than in configration options to + // support values less than 2 (mainly for faster tests). + const backoff = options.retryBackoff >= 0 ? options.retryBackoff : 1; + configuration.httpApi.backoffBase = backoff; + if (this.site) { // Strip leading `app.` from the site in case someone copy/pasted the // URL from their web browser. More details on correct configuration: @@ -64,6 +177,7 @@ class DatadogReporter { site: this.site }); } + datadogClients.set(this, new datadogApiClient.v1.MetricsApi(configuration)); } @@ -139,7 +253,7 @@ class DataDogReporter extends DatadogReporter { 'DataDogReporter has been renamed to DatadogReporter (lower-case ' + 'D in "dog"); the old name will be removed in a future release.' ); - super(apiKey, appKey, site); + super({ apiKey, appKey, site }); } } diff --git a/package.json b/package.json index e2db1ef..4c9ea8c 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,7 @@ "typescript": "^4.8.4" }, "dependencies": { - "@datadog/datadog-api-client": "^1.16.0", + "@datadog/datadog-api-client": "^1.17.0", "debug": "^4.1.0" }, "engines": { diff --git a/test/reporters_tests.js b/test/reporters_tests.js index 11dfbf9..53084e3 100644 --- a/test/reporters_tests.js +++ b/test/reporters_tests.js @@ -33,7 +33,10 @@ describe('DatadogReporter', function() { }); it('creates a DatadogReporter', () => { - const instance = new DatadogReporter('abc', '123', 'datadoghq.eu'); + const instance = new DatadogReporter({ + apiKey: 'abc', + site: 'datadoghq.eu' + }); instance.should.be.an.instanceof(DatadogReporter); }); @@ -58,7 +61,10 @@ describe('DatadogReporter', function() { let reporter; beforeEach(() => { - reporter = new DatadogReporter('abc'); + reporter = new DatadogReporter({ + apiKey: 'abc', + retryBackoff: 0.01 + }); }); it('should resolve on success', async function () { @@ -69,14 +75,109 @@ describe('DatadogReporter', function() { await reporter.report([mockMetric]).should.be.fulfilled; }); - it('should reject on error', async function () { + it('should reject on http error', async function () { nock('https://api.datadoghq.com') .post('/api/v1/series') + .times(3) .reply(500, { errors: ['Unknown!'] }); await reporter.report([mockMetric]).should.be.rejected; }); + it('should retry on http error', async function () { + nock('https://api.datadoghq.com') + .post('/api/v1/series') + .times(1) + .reply(500, { errors: ['Unknown!'] }) + .post('/api/v1/series') + .times(1) + .reply(202, { errors: [] }); + + await reporter.report([mockMetric]).should.be.fulfilled; + }); + + it('should respect the `Retry-After` header', async function () { + const callTimes = []; + + nock('https://api.datadoghq.com') + .post('/api/v1/series') + .times(1) + .reply(() => { + callTimes.push(Date.now()); + return [429, { errors: ['Uhoh'] }, { 'Retry-After': '1' }]; + }) + .post('/api/v1/series') + .times(1) + .reply(() => { + callTimes.push(Date.now()); + return [202, { errors: [] }]; + }); + + await reporter.report([mockMetric]).should.be.fulfilled; + + const timeDelta = callTimes[1] - callTimes[0]; + timeDelta.should.be.within(980, 1020); + }); + + it('should respect the `X-RateLimit-Reset` header', async function () { + const callTimes = []; + + nock('https://api.datadoghq.com') + .post('/api/v1/series') + .times(1) + .reply(() => { + callTimes.push(Date.now()); + return [429, { errors: ['Uhoh'] }, { 'X-RateLimit-Reset': '1' }]; + }) + .post('/api/v1/series') + .times(1) + .reply(() => { + callTimes.push(Date.now()); + return [202, { errors: [] }]; + }); + + await reporter.report([mockMetric]).should.be.fulfilled; + + const timeDelta = callTimes[1] - callTimes[0]; + timeDelta.should.be.within(980, 1020); + }); + + it('should reject on network error', async function () { + nock('https://api.datadoghq.com') + .post('/api/v1/series') + .times(3) + .replyWithError({ + message: 'connect ECONNREFUSED', + code: 'ECONNREFUSED' + }); + + await reporter.report([mockMetric]).should.be.rejected; + }); + + it('should retry on network error', async function () { + nock('https://api.datadoghq.com') + .post('/api/v1/series') + .times(1) + .replyWithError({ + message: 'connect ECONNREFUSED', + code: 'ECONNREFUSED' + }) + .post('/api/v1/series') + .times(1) + .reply(202, { errors: [] }); + + await reporter.report([mockMetric]).should.be.fulfilled; + }); + + it('should not retry on unknown errors', async function () { + nock('https://api.datadoghq.com') + .post('/api/v1/series') + .times(1) + .replyWithError({ message: 'Oh no!' }); + + await reporter.report([mockMetric]).should.be.rejectedWith('Oh no!'); + }); + it('rejects with AuthorizationError when the API key is invalid', async function() { nock('https://api.datadoghq.com') .post('/api/v1/series') @@ -99,7 +200,7 @@ describe('DatadogReporter', function() { .times(apiKeys.length) .reply(202, { errors: [] }); - const reporters = apiKeys.map(key => new DatadogReporter(key)); + const reporters = apiKeys.map(apiKey => new DatadogReporter({ apiKey })); await Promise.all(reporters.map(r => r.report([mockMetric]))); receivedKeys.should.deep.equal(apiKeys);