diff --git a/packages/host-metrics/README.md b/packages/host-metrics/README.md index 587c5e57a7..23bd24417e 100644 --- a/packages/host-metrics/README.md +++ b/packages/host-metrics/README.md @@ -40,6 +40,12 @@ const hostMetrics = new HostMetrics({ meterProvider }); hostMetrics.start(); ``` +## Configuration + +| Option | Type | Description | +| -------------- | ---------- | ----------- | +| `metricGroups` | `string[]` | Optionally specify zero or more groups of metrics to collect. This package can collect many metrics. They are grouped by metric name prefix (see the "Semantic Conventions" section below). If this option is specified, only metrics from the named groups will be collected. For example, `metricGroups: ['process.cpu', 'process.memory']` will limit collection to just those 3 metrics. | + ## Semantic Conventions This package uses Semantic Conventions [Version 1.25.0](https://github.com/open-telemetry/semantic-conventions/tree/v1.25.0/docs/system). @@ -48,18 +54,25 @@ Ref: [opentelemetry-js/issues/4235](https://github.com/open-telemetry/openteleme Metrics collected: -| Metric | Short Description | -| --------------------------- | --------------------------------------------------------- | -| `system.cpu.time` | Seconds each logical CPU spent on each mode | -| `system.cpu.utilization` | CPU usage time (0-1) | -| `system.memory.usage` | Reports memory in use by state | -| `system.memory.utilization` | Memory usage (0-1) | -| `system.network.dropped` | Count of packets that are dropped | -| `system.network.errors` | Count of network errors detected | -| `system.network.io` | Network flow direction | -| `process.cpu.time` | Total CPU seconds | -| `process.cpu.utilization` | Difference in process.cpu.time since the last measurement | -| `process.memory.usage` | The amount of physical memory in use | +| Metric | Short Description | +| ----------------------------- | --------------------------------------------------------- | +| **Group `system.cpu`** | | +| `system.cpu.time` | Seconds each logical CPU spent on each mode | +| `system.cpu.utilization` | CPU usage time (0-1) | +| **Group `system.memory`** | | +| `system.memory.usage` | Reports memory in use by state | +| `system.memory.utilization` | Memory usage (0-1) | +| **Group `system.network`** | | +| `system.network.dropped` | Count of packets that are dropped | +| `system.network.errors` | Count of network errors detected | +| `system.network.io` | Network flow direction | +| **Group `process.cpu`** | | +| `process.cpu.time` | Total CPU seconds | +| `process.cpu.utilization` | Difference in process.cpu.time since the last measurement | +| **Group `process.memory`** | | +| `process.memory.usage` | The amount of physical memory in use | + +Note: the "Group" names are groupings used by the `metricGroups` configuration option. Attributes collected: diff --git a/packages/host-metrics/src/BaseMetrics.ts b/packages/host-metrics/src/BaseMetrics.ts index a320a6c552..27b141996d 100644 --- a/packages/host-metrics/src/BaseMetrics.ts +++ b/packages/host-metrics/src/BaseMetrics.ts @@ -14,7 +14,7 @@ * limitations under the License. */ -import { Meter, MeterProvider, diag, metrics } from '@opentelemetry/api'; +import { Meter, MeterProvider, metrics } from '@opentelemetry/api'; /** @knipignore */ import { PACKAGE_NAME, PACKAGE_VERSION } from './version'; @@ -27,6 +27,7 @@ export interface MetricsCollectorConfig { meterProvider?: MeterProvider; // Name of component name?: string; + metricGroups?: string[]; } const DEFAULT_NAME = PACKAGE_NAME; @@ -35,9 +36,9 @@ const DEFAULT_NAME = PACKAGE_NAME; * Base Class for metrics */ export abstract class BaseMetrics { - protected _logger = diag; protected _meter: Meter; private _name: string; + protected _metricGroups: Array | undefined; constructor(config?: MetricsCollectorConfig) { // Do not use `??` operator to allow falling back to default when the @@ -45,6 +46,7 @@ export abstract class BaseMetrics { this._name = config?.name || DEFAULT_NAME; const meterProvider = config?.meterProvider ?? metrics.getMeterProvider(); this._meter = meterProvider.getMeter(this._name, PACKAGE_VERSION); + this._metricGroups = config?.metricGroups; } /** diff --git a/packages/host-metrics/src/metric.ts b/packages/host-metrics/src/metric.ts index 5ccef55b91..0a37ab09dc 100644 --- a/packages/host-metrics/src/metric.ts +++ b/packages/host-metrics/src/metric.ts @@ -205,102 +205,130 @@ export class HostMetrics extends BaseMetrics { * Creates metrics */ protected _createMetrics(): void { - this._cpuTime = this._meter.createObservableCounter( - METRIC_SYSTEM_CPU_TIME, - { - description: 'Cpu time in seconds', - unit: 's', - } - ); - this._cpuUtilization = this._meter.createObservableGauge( - METRIC_SYSTEM_CPU_UTILIZATION, - { - description: 'Cpu usage time 0-1', - } - ); + const observables = []; - this._memoryUsage = this._meter.createObservableGauge( - METRIC_SYSTEM_MEMORY_USAGE, - { - description: 'Memory usage in bytes', - } - ); - this._memoryUtilization = this._meter.createObservableGauge( - METRIC_SYSTEM_MEMORY_UTILIZATION, - { - description: 'Memory usage 0-1', - } - ); + const systemCpuGroupEnabled = + !this._metricGroups || this._metricGroups.includes('system.cpu'); + const systemMemoryGroupEnabled = + !this._metricGroups || this._metricGroups.includes('system.memory'); + const systemNetworkGroupEnabled = + !this._metricGroups || this._metricGroups.includes('system.network'); + const processCpuGroupEnabled = + !this._metricGroups || this._metricGroups.includes('process.cpu'); + const processMemoryGroupEnabled = + !this._metricGroups || this._metricGroups.includes('process.memory'); - this._networkDropped = this._meter.createObservableCounter( - // There is no semconv pkg export for this in v1.37.0 because - // https://github.com/open-telemetry/semantic-conventions/issues/2828. - // TODO: update to `METRIC_SYSTEM_NETWORK_PACKET_DROPPED` (breaking change) - 'system.network.dropped', - { - description: 'Network dropped packets', - } - ); - this._networkErrors = this._meter.createObservableCounter( - METRIC_SYSTEM_NETWORK_ERRORS, - { - description: 'Network errors counter', - } - ); - this._networkIo = this._meter.createObservableCounter( - METRIC_SYSTEM_NETWORK_IO, - { - description: 'Network transmit and received bytes', - } - ); + if (systemCpuGroupEnabled) { + this._cpuTime = this._meter.createObservableCounter( + METRIC_SYSTEM_CPU_TIME, + { + description: 'Cpu time in seconds', + unit: 's', + } + ); + observables.push(this._cpuTime); + this._cpuUtilization = this._meter.createObservableGauge( + METRIC_SYSTEM_CPU_UTILIZATION, + { + description: 'Cpu usage time 0-1', + } + ); + observables.push(this._cpuUtilization); + } - this._processCpuTime = this._meter.createObservableCounter( - METRIC_PROCESS_CPU_TIME, - { - description: 'Process Cpu time in seconds', - unit: 's', - } - ); - this._processCpuUtilization = this._meter.createObservableGauge( - METRIC_PROCESS_CPU_UTILIZATION, - { - description: 'Process Cpu usage time 0-1', - } - ); - this._processMemoryUsage = this._meter.createObservableGauge( - METRIC_PROCESS_MEMORY_USAGE, - { - description: 'Process Memory usage in bytes', - } - ); + if (systemMemoryGroupEnabled) { + this._memoryUsage = this._meter.createObservableGauge( + METRIC_SYSTEM_MEMORY_USAGE, + { + description: 'Memory usage in bytes', + } + ); + observables.push(this._memoryUsage); + this._memoryUtilization = this._meter.createObservableGauge( + METRIC_SYSTEM_MEMORY_UTILIZATION, + { + description: 'Memory usage 0-1', + } + ); + observables.push(this._memoryUtilization); + } - this._meter.addBatchObservableCallback( - async observableResult => { - const cpuUsages = getCpuUsageData(); - const memoryUsages = getMemoryData(); - const processCpuUsages = getProcessCpuUsageData(); - const processMemoryUsages = getProcessMemoryData(); - const networkData = await getNetworkData(); + if (systemNetworkGroupEnabled) { + this._networkDropped = this._meter.createObservableCounter( + // There is no semconv pkg export for this in v1.37.0 because + // https://github.com/open-telemetry/semantic-conventions/issues/2828. + // TODO: update to `METRIC_SYSTEM_NETWORK_PACKET_DROPPED` (breaking change) + 'system.network.dropped', + { + description: 'Network dropped packets', + } + ); + observables.push(this._networkDropped); + this._networkErrors = this._meter.createObservableCounter( + METRIC_SYSTEM_NETWORK_ERRORS, + { + description: 'Network errors counter', + } + ); + observables.push(this._networkErrors); + this._networkIo = this._meter.createObservableCounter( + METRIC_SYSTEM_NETWORK_IO, + { + description: 'Network transmit and received bytes', + } + ); + observables.push(this._networkIo); + } + + if (processCpuGroupEnabled) { + this._processCpuTime = this._meter.createObservableCounter( + METRIC_PROCESS_CPU_TIME, + { + description: 'Process Cpu time in seconds', + unit: 's', + } + ); + observables.push(this._processCpuTime); + this._processCpuUtilization = this._meter.createObservableGauge( + METRIC_PROCESS_CPU_UTILIZATION, + { + description: 'Process Cpu usage time 0-1', + } + ); + observables.push(this._processCpuUtilization); + } + if (processMemoryGroupEnabled) { + this._processMemoryUsage = this._meter.createObservableGauge( + METRIC_PROCESS_MEMORY_USAGE, + { + description: 'Process Memory usage in bytes', + } + ); + observables.push(this._processMemoryUsage); + } + this._meter.addBatchObservableCallback(async observableResult => { + if (systemCpuGroupEnabled) { + const cpuUsages = getCpuUsageData(); this._batchUpdateCpuUsages(observableResult, cpuUsages); + } + if (systemMemoryGroupEnabled) { + const memoryUsages = getMemoryData(); this._batchUpdateMemUsages(observableResult, memoryUsages); + } + if (processCpuGroupEnabled) { + const processCpuUsages = getProcessCpuUsageData(); this._batchUpdateProcessCpuUsages(observableResult, processCpuUsages); + } + if (processMemoryGroupEnabled) { + const processMemoryUsages = getProcessMemoryData(); this._batchUpdateProcessMemUsage(observableResult, processMemoryUsages); + } + if (systemNetworkGroupEnabled) { + const networkData = await getNetworkData(); this._batchUpdateNetworkData(observableResult, networkData); - }, - [ - this._cpuTime, - this._cpuUtilization, - this._memoryUsage, - this._memoryUtilization, - this._processCpuTime, - this._processCpuUtilization, - this._processMemoryUsage, - this._networkDropped, - this._networkErrors, - this._networkIo, - ] - ); + } + }, observables); } /** diff --git a/packages/host-metrics/test/metric.test.ts b/packages/host-metrics/test/metric.test.ts index d6e9ba1654..ed4d6fe477 100644 --- a/packages/host-metrics/test/metric.test.ts +++ b/packages/host-metrics/test/metric.test.ts @@ -36,7 +36,7 @@ import { ATTR_SYSTEM_DEVICE, ATTR_SYSTEM_MEMORY_STATE, } from '../src/semconv'; -import { HostMetrics } from '../src'; +import { HostMetrics, MetricsCollectorConfig } from '../src'; const cpuJson = require('./mocks/cpu.json'); const processJson = require('./mocks/process.json'); @@ -450,12 +450,111 @@ describe('Host Metrics', () => { ensureValue(metric, {}, 123456); }); }); + + describe('metricGroups config option', () => { + let sandbox: sinon.SinonSandbox; + let hostMetrics: HostMetrics; + let reader: TestMetricReader; + + // This "setup" does the same as `beforeEach` above, with the addition of + // `hostMetricsConfig`. + const setup = async ( + hostMetricsConfig: Partial + ) => { + sandbox = sinon.createSandbox(); + sandbox.useFakeTimers(); + + sandbox.stub(os, 'freemem').callsFake(mockedOS.freemem); + sandbox.stub(os, 'totalmem').callsFake(mockedOS.totalmem); + sandbox.stub(os, 'cpus').callsFake(() => mockedOS.cpus()); + sandbox.stub(process, 'uptime').callsFake(mockedProcess.uptime); + sandbox + .stub(process, 'cpuUsage') + .callsFake(() => mockedProcess.cpuUsage()); + sandbox + .stub(process.memoryUsage, 'rss') + .callsFake(mockedProcess.memoryUsage.rss); + sandbox.stub(Network, 'networkStats').callsFake(mockedSI.networkStats); + + reader = new TestMetricReader(); + + meterProvider = new MeterProvider({ + readers: [reader], + }); + + hostMetrics = new HostMetrics( + Object.assign( + { + meterProvider, + name: '', // to get default instrumentation scope name + }, + hostMetricsConfig + ) + ); + + await hostMetrics.start(); + + // Drop first frame cpu metrics, see + // src/common.ts getCpuUsageData/getProcessCpuUsageData + await reader.collect(); + + // advance the clock for the next collection + sandbox.clock.tick(1000); + + // invalidates throttles + countSI = 0; + }; + const teardown = () => { + sandbox.restore(); + }; + + const testCaseData = [ + { + metricGroups: ['system.cpu'], + expectedMetricNames: ['system.cpu.time', 'system.cpu.utilization'], + }, + { + metricGroups: ['system.memory'], + expectedMetricNames: [ + 'system.memory.usage', + 'system.memory.utilization', + ], + }, + { + metricGroups: ['system.network'], + expectedMetricNames: [ + 'system.network.dropped', + 'system.network.errors', + 'system.network.io', + ], + }, + { + metricGroups: ['process.cpu'], + expectedMetricNames: ['process.cpu.time', 'process.cpu.utilization'], + }, + { + metricGroups: ['process.memory'], + expectedMetricNames: ['process.memory.usage'], + }, + ]; + + for (const testCaseDatum of testCaseData) { + it(`metricGroups: ${JSON.stringify( + testCaseDatum.metricGroups + )}`, async () => { + await setup({ metricGroups: testCaseDatum.metricGroups }); + const metricData = await getMetricData(reader); + const metricNames = metricData.map(md => md.descriptor.name); + assert.deepStrictEqual(metricNames, testCaseDatum.expectedMetricNames); + teardown(); + }); + } + }); }); -async function getRecords( - metricReader: MetricReader, - name: string -): Promise { +async function getMetricData( + metricReader: MetricReader +): Promise { const collectionResult = await metricReader.collect(); assert(collectionResult != null); assert.strictEqual(collectionResult.resourceMetrics.scopeMetrics.length, 1); @@ -465,7 +564,15 @@ async function getRecords( '@opentelemetry/host-metrics', 'default instrumentation scope name is the package name' ); - const metricDataList = scopeMetrics.metrics.filter( + return scopeMetrics.metrics; +} + +async function getRecords( + metricReader: MetricReader, + name: string +): Promise { + const metricData = await getMetricData(metricReader); + const metricDataList = metricData.filter( metric => metric.descriptor.name === name ); assert.strictEqual(metricDataList.length, 1);