From 29e0090e7ee4a9aef2e0047386821c6e547891cb Mon Sep 17 00:00:00 2001 From: Trent Mick Date: Mon, 6 Oct 2025 14:02:05 -0700 Subject: [PATCH 1/6] drop unused _logger property --- packages/host-metrics/src/BaseMetrics.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/host-metrics/src/BaseMetrics.ts b/packages/host-metrics/src/BaseMetrics.ts index a320a6c552..cb7ab325ae 100644 --- a/packages/host-metrics/src/BaseMetrics.ts +++ b/packages/host-metrics/src/BaseMetrics.ts @@ -14,7 +14,7 @@ * limitations under the License. */ -import { Meter, MeterProvider, diag, metrics } from '@opentelemetry/api'; +import { Meter, MeterProvider, metrics } from '@opentelemetry/api'; /** @knipignore */ import { PACKAGE_NAME, PACKAGE_VERSION } from './version'; @@ -35,7 +35,6 @@ const DEFAULT_NAME = PACKAGE_NAME; * Base Class for metrics */ export abstract class BaseMetrics { - protected _logger = diag; protected _meter: Meter; private _name: string; From 275c22548f6b95c4e1d98cf228a9e1acc21462fa Mon Sep 17 00:00:00 2001 From: Trent Mick Date: Mon, 6 Oct 2025 14:28:03 -0700 Subject: [PATCH 2/6] feat(host-metrics): add 'metricsGroups' configuration option to limit which metrics are collected Collection of some host-metrics metrics can be costly. Using a Metrics View can be used to drop metrics, but there will still be the cost of having collected them in the first place. This adds a config option to select which groups of metrics should be collected. --- packages/host-metrics/README.md | 35 ++-- packages/host-metrics/src/BaseMetrics.ts | 3 + packages/host-metrics/src/metric.ts | 200 +++++++++++++---------- 3 files changed, 140 insertions(+), 98 deletions(-) diff --git a/packages/host-metrics/README.md b/packages/host-metrics/README.md index 587c5e57a7..66584bc986 100644 --- a/packages/host-metrics/README.md +++ b/packages/host-metrics/README.md @@ -40,6 +40,12 @@ const hostMetrics = new HostMetrics({ meterProvider }); hostMetrics.start(); ``` +## Configuration + +| Option | Type | Description | +| ----------------| --------------- | ----------- | +| `metricsGroups` | `Array` | Optionally specify zero or more groups of metrics to collect. This package can collect many metrics. They are grouped by metric name prefix (see the "Semantic Conventions" section below). If this option is specified, only metrics from the named groups will be collected. For example, `metricsGroups: ['process.cpu', 'process.memory']` will limit collection to just those 3 metrics. | + ## Semantic Conventions This package uses Semantic Conventions [Version 1.25.0](https://github.com/open-telemetry/semantic-conventions/tree/v1.25.0/docs/system). @@ -48,18 +54,23 @@ Ref: [opentelemetry-js/issues/4235](https://github.com/open-telemetry/openteleme Metrics collected: -| Metric | Short Description | -| --------------------------- | --------------------------------------------------------- | -| `system.cpu.time` | Seconds each logical CPU spent on each mode | -| `system.cpu.utilization` | CPU usage time (0-1) | -| `system.memory.usage` | Reports memory in use by state | -| `system.memory.utilization` | Memory usage (0-1) | -| `system.network.dropped` | Count of packets that are dropped | -| `system.network.errors` | Count of network errors detected | -| `system.network.io` | Network flow direction | -| `process.cpu.time` | Total CPU seconds | -| `process.cpu.utilization` | Difference in process.cpu.time since the last measurement | -| `process.memory.usage` | The amount of physical memory in use | +| Metric | Short Description | +| ----------------------------- | --------------------------------------------------------- | +| **Group `system.cpu`** | | +| `system.cpu.time` | Seconds each logical CPU spent on each mode | +| `system.cpu.utilization` | CPU usage time (0-1) | +| **Group `system.memory`** | | +| `system.memory.usage` | Reports memory in use by state | +| `system.memory.utilization` | Memory usage (0-1) | +| **Group `system.network`** | | +| `system.network.dropped` | Count of packets that are dropped | +| `system.network.errors` | Count of network errors detected | +| `system.network.io` | Network flow direction | +| **Group `process.cpu`** | | +| `process.cpu.time` | Total CPU seconds | +| `process.cpu.utilization` | Difference in process.cpu.time since the last measurement | +| **Group `process.memory`** | | +| `process.memory.usage` | The amount of physical memory in use | Attributes collected: diff --git a/packages/host-metrics/src/BaseMetrics.ts b/packages/host-metrics/src/BaseMetrics.ts index cb7ab325ae..51dfcf3ac8 100644 --- a/packages/host-metrics/src/BaseMetrics.ts +++ b/packages/host-metrics/src/BaseMetrics.ts @@ -27,6 +27,7 @@ export interface MetricsCollectorConfig { meterProvider?: MeterProvider; // Name of component name?: string; + metricsGroups?: Array; } const DEFAULT_NAME = PACKAGE_NAME; @@ -37,6 +38,7 @@ const DEFAULT_NAME = PACKAGE_NAME; export abstract class BaseMetrics { protected _meter: Meter; private _name: string; + protected _metricsGroups: Array | undefined; constructor(config?: MetricsCollectorConfig) { // Do not use `??` operator to allow falling back to default when the @@ -44,6 +46,7 @@ export abstract class BaseMetrics { this._name = config?.name || DEFAULT_NAME; const meterProvider = config?.meterProvider ?? metrics.getMeterProvider(); this._meter = meterProvider.getMeter(this._name, PACKAGE_VERSION); + this._metricsGroups = config?.metricsGroups; } /** diff --git a/packages/host-metrics/src/metric.ts b/packages/host-metrics/src/metric.ts index 5ccef55b91..1554c5cc4c 100644 --- a/packages/host-metrics/src/metric.ts +++ b/packages/host-metrics/src/metric.ts @@ -205,102 +205,130 @@ export class HostMetrics extends BaseMetrics { * Creates metrics */ protected _createMetrics(): void { - this._cpuTime = this._meter.createObservableCounter( - METRIC_SYSTEM_CPU_TIME, - { - description: 'Cpu time in seconds', - unit: 's', - } - ); - this._cpuUtilization = this._meter.createObservableGauge( - METRIC_SYSTEM_CPU_UTILIZATION, - { - description: 'Cpu usage time 0-1', - } - ); + const observables = []; - this._memoryUsage = this._meter.createObservableGauge( - METRIC_SYSTEM_MEMORY_USAGE, - { - description: 'Memory usage in bytes', - } - ); - this._memoryUtilization = this._meter.createObservableGauge( - METRIC_SYSTEM_MEMORY_UTILIZATION, - { - description: 'Memory usage 0-1', - } - ); + const systemCpuGroupEnabled = + !this._metricsGroups || this._metricsGroups.includes('system.cpu'); + const systemMemoryGroupEnabled = + !this._metricsGroups || this._metricsGroups.includes('system.memory'); + const systemNetworkGroupEnabled = + !this._metricsGroups || this._metricsGroups.includes('system.network'); + const processCpuGroupEnabled = + !this._metricsGroups || this._metricsGroups.includes('process.cpu'); + const processMemoryGroupEnabled = + !this._metricsGroups || this._metricsGroups.includes('process.memory'); - this._networkDropped = this._meter.createObservableCounter( - // There is no semconv pkg export for this in v1.37.0 because - // https://github.com/open-telemetry/semantic-conventions/issues/2828. - // TODO: update to `METRIC_SYSTEM_NETWORK_PACKET_DROPPED` (breaking change) - 'system.network.dropped', - { - description: 'Network dropped packets', - } - ); - this._networkErrors = this._meter.createObservableCounter( - METRIC_SYSTEM_NETWORK_ERRORS, - { - description: 'Network errors counter', - } - ); - this._networkIo = this._meter.createObservableCounter( - METRIC_SYSTEM_NETWORK_IO, - { - description: 'Network transmit and received bytes', - } - ); + if (systemCpuGroupEnabled) { + this._cpuTime = this._meter.createObservableCounter( + METRIC_SYSTEM_CPU_TIME, + { + description: 'Cpu time in seconds', + unit: 's', + } + ); + observables.push(this._cpuTime); + this._cpuUtilization = this._meter.createObservableGauge( + METRIC_SYSTEM_CPU_UTILIZATION, + { + description: 'Cpu usage time 0-1', + } + ); + observables.push(this._cpuUtilization); + } - this._processCpuTime = this._meter.createObservableCounter( - METRIC_PROCESS_CPU_TIME, - { - description: 'Process Cpu time in seconds', - unit: 's', - } - ); - this._processCpuUtilization = this._meter.createObservableGauge( - METRIC_PROCESS_CPU_UTILIZATION, - { - description: 'Process Cpu usage time 0-1', - } - ); - this._processMemoryUsage = this._meter.createObservableGauge( - METRIC_PROCESS_MEMORY_USAGE, - { - description: 'Process Memory usage in bytes', - } - ); + if (systemMemoryGroupEnabled) { + this._memoryUsage = this._meter.createObservableGauge( + METRIC_SYSTEM_MEMORY_USAGE, + { + description: 'Memory usage in bytes', + } + ); + observables.push(this._memoryUsage); + this._memoryUtilization = this._meter.createObservableGauge( + METRIC_SYSTEM_MEMORY_UTILIZATION, + { + description: 'Memory usage 0-1', + } + ); + observables.push(this._memoryUtilization); + } - this._meter.addBatchObservableCallback( - async observableResult => { - const cpuUsages = getCpuUsageData(); - const memoryUsages = getMemoryData(); - const processCpuUsages = getProcessCpuUsageData(); - const processMemoryUsages = getProcessMemoryData(); - const networkData = await getNetworkData(); + if (systemNetworkGroupEnabled) { + this._networkDropped = this._meter.createObservableCounter( + // There is no semconv pkg export for this in v1.37.0 because + // https://github.com/open-telemetry/semantic-conventions/issues/2828. + // TODO: update to `METRIC_SYSTEM_NETWORK_PACKET_DROPPED` (breaking change) + 'system.network.dropped', + { + description: 'Network dropped packets', + } + ); + observables.push(this._networkDropped); + this._networkErrors = this._meter.createObservableCounter( + METRIC_SYSTEM_NETWORK_ERRORS, + { + description: 'Network errors counter', + } + ); + observables.push(this._networkErrors); + this._networkIo = this._meter.createObservableCounter( + METRIC_SYSTEM_NETWORK_IO, + { + description: 'Network transmit and received bytes', + } + ); + observables.push(this._networkIo); + } + + if (processCpuGroupEnabled) { + this._processCpuTime = this._meter.createObservableCounter( + METRIC_PROCESS_CPU_TIME, + { + description: 'Process Cpu time in seconds', + unit: 's', + } + ); + observables.push(this._processCpuTime); + this._processCpuUtilization = this._meter.createObservableGauge( + METRIC_PROCESS_CPU_UTILIZATION, + { + description: 'Process Cpu usage time 0-1', + } + ); + observables.push(this._processCpuUtilization); + } + if (processMemoryGroupEnabled) { + this._processMemoryUsage = this._meter.createObservableGauge( + METRIC_PROCESS_MEMORY_USAGE, + { + description: 'Process Memory usage in bytes', + } + ); + observables.push(this._processMemoryUsage); + } + this._meter.addBatchObservableCallback(async observableResult => { + if (systemCpuGroupEnabled) { + const cpuUsages = getCpuUsageData(); this._batchUpdateCpuUsages(observableResult, cpuUsages); + } + if (systemMemoryGroupEnabled) { + const memoryUsages = getMemoryData(); this._batchUpdateMemUsages(observableResult, memoryUsages); + } + if (processCpuGroupEnabled) { + const processCpuUsages = getProcessCpuUsageData(); this._batchUpdateProcessCpuUsages(observableResult, processCpuUsages); + } + if (processMemoryGroupEnabled) { + const processMemoryUsages = getProcessMemoryData(); this._batchUpdateProcessMemUsage(observableResult, processMemoryUsages); + } + if (systemNetworkGroupEnabled) { + const networkData = await getNetworkData(); this._batchUpdateNetworkData(observableResult, networkData); - }, - [ - this._cpuTime, - this._cpuUtilization, - this._memoryUsage, - this._memoryUtilization, - this._processCpuTime, - this._processCpuUtilization, - this._processMemoryUsage, - this._networkDropped, - this._networkErrors, - this._networkIo, - ] - ); + } + }, observables); } /** From aabb9b75f2022e62b10a205a5616c0c1490930ae Mon Sep 17 00:00:00 2001 From: Trent Mick Date: Mon, 6 Oct 2025 14:37:15 -0700 Subject: [PATCH 3/6] s/metricsGroups/metricGroups/ because the double-plural is weird --- packages/host-metrics/README.md | 8 +++++--- packages/host-metrics/src/BaseMetrics.ts | 6 +++--- packages/host-metrics/src/metric.ts | 10 +++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/packages/host-metrics/README.md b/packages/host-metrics/README.md index 66584bc986..05f1a95a4e 100644 --- a/packages/host-metrics/README.md +++ b/packages/host-metrics/README.md @@ -42,9 +42,9 @@ hostMetrics.start(); ## Configuration -| Option | Type | Description | -| ----------------| --------------- | ----------- | -| `metricsGroups` | `Array` | Optionally specify zero or more groups of metrics to collect. This package can collect many metrics. They are grouped by metric name prefix (see the "Semantic Conventions" section below). If this option is specified, only metrics from the named groups will be collected. For example, `metricsGroups: ['process.cpu', 'process.memory']` will limit collection to just those 3 metrics. | +| Option | Type | Description | +| -------------- | --------------- | ----------- | +| `metricGroups` | `Array` | Optionally specify zero or more groups of metrics to collect. This package can collect many metrics. They are grouped by metric name prefix (see the "Semantic Conventions" section below). If this option is specified, only metrics from the named groups will be collected. For example, `metricGroups: ['process.cpu', 'process.memory']` will limit collection to just those 3 metrics. | ## Semantic Conventions @@ -72,6 +72,8 @@ Metrics collected: | **Group `process.memory`** | | | `process.memory.usage` | The amount of physical memory in use | +Note: the "Group" names are groupings used by the `metricGroups` configuration option. + Attributes collected: | Metric | Short Description | diff --git a/packages/host-metrics/src/BaseMetrics.ts b/packages/host-metrics/src/BaseMetrics.ts index 51dfcf3ac8..ea7d47518b 100644 --- a/packages/host-metrics/src/BaseMetrics.ts +++ b/packages/host-metrics/src/BaseMetrics.ts @@ -27,7 +27,7 @@ export interface MetricsCollectorConfig { meterProvider?: MeterProvider; // Name of component name?: string; - metricsGroups?: Array; + metricGroups?: Array; } const DEFAULT_NAME = PACKAGE_NAME; @@ -38,7 +38,7 @@ const DEFAULT_NAME = PACKAGE_NAME; export abstract class BaseMetrics { protected _meter: Meter; private _name: string; - protected _metricsGroups: Array | undefined; + protected _metricGroups: Array | undefined; constructor(config?: MetricsCollectorConfig) { // Do not use `??` operator to allow falling back to default when the @@ -46,7 +46,7 @@ export abstract class BaseMetrics { this._name = config?.name || DEFAULT_NAME; const meterProvider = config?.meterProvider ?? metrics.getMeterProvider(); this._meter = meterProvider.getMeter(this._name, PACKAGE_VERSION); - this._metricsGroups = config?.metricsGroups; + this._metricGroups = config?.metricGroups; } /** diff --git a/packages/host-metrics/src/metric.ts b/packages/host-metrics/src/metric.ts index 1554c5cc4c..0a37ab09dc 100644 --- a/packages/host-metrics/src/metric.ts +++ b/packages/host-metrics/src/metric.ts @@ -208,15 +208,15 @@ export class HostMetrics extends BaseMetrics { const observables = []; const systemCpuGroupEnabled = - !this._metricsGroups || this._metricsGroups.includes('system.cpu'); + !this._metricGroups || this._metricGroups.includes('system.cpu'); const systemMemoryGroupEnabled = - !this._metricsGroups || this._metricsGroups.includes('system.memory'); + !this._metricGroups || this._metricGroups.includes('system.memory'); const systemNetworkGroupEnabled = - !this._metricsGroups || this._metricsGroups.includes('system.network'); + !this._metricGroups || this._metricGroups.includes('system.network'); const processCpuGroupEnabled = - !this._metricsGroups || this._metricsGroups.includes('process.cpu'); + !this._metricGroups || this._metricGroups.includes('process.cpu'); const processMemoryGroupEnabled = - !this._metricsGroups || this._metricsGroups.includes('process.memory'); + !this._metricGroups || this._metricGroups.includes('process.memory'); if (systemCpuGroupEnabled) { this._cpuTime = this._meter.createObservableCounter( From 68f33618e0b55b5b8effc8ad603ede251434b125 Mon Sep 17 00:00:00 2001 From: Trent Mick Date: Tue, 7 Oct 2025 07:47:30 -0700 Subject: [PATCH 4/6] better type suggestion Co-authored-by: Chengzhong Wu --- packages/host-metrics/src/BaseMetrics.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/host-metrics/src/BaseMetrics.ts b/packages/host-metrics/src/BaseMetrics.ts index ea7d47518b..27b141996d 100644 --- a/packages/host-metrics/src/BaseMetrics.ts +++ b/packages/host-metrics/src/BaseMetrics.ts @@ -27,7 +27,7 @@ export interface MetricsCollectorConfig { meterProvider?: MeterProvider; // Name of component name?: string; - metricGroups?: Array; + metricGroups?: string[]; } const DEFAULT_NAME = PACKAGE_NAME; From 3c14890e5fe0d43f9a8eb1c6dd8a978be697a273 Mon Sep 17 00:00:00 2001 From: Trent Mick Date: Wed, 15 Oct 2025 16:07:56 -0700 Subject: [PATCH 5/6] add tests for 'metricGroups' config option --- packages/host-metrics/test/metric.test.ts | 119 ++++++++++++++++++++-- 1 file changed, 113 insertions(+), 6 deletions(-) diff --git a/packages/host-metrics/test/metric.test.ts b/packages/host-metrics/test/metric.test.ts index d6e9ba1654..ed4d6fe477 100644 --- a/packages/host-metrics/test/metric.test.ts +++ b/packages/host-metrics/test/metric.test.ts @@ -36,7 +36,7 @@ import { ATTR_SYSTEM_DEVICE, ATTR_SYSTEM_MEMORY_STATE, } from '../src/semconv'; -import { HostMetrics } from '../src'; +import { HostMetrics, MetricsCollectorConfig } from '../src'; const cpuJson = require('./mocks/cpu.json'); const processJson = require('./mocks/process.json'); @@ -450,12 +450,111 @@ describe('Host Metrics', () => { ensureValue(metric, {}, 123456); }); }); + + describe('metricGroups config option', () => { + let sandbox: sinon.SinonSandbox; + let hostMetrics: HostMetrics; + let reader: TestMetricReader; + + // This "setup" does the same as `beforeEach` above, with the addition of + // `hostMetricsConfig`. + const setup = async ( + hostMetricsConfig: Partial + ) => { + sandbox = sinon.createSandbox(); + sandbox.useFakeTimers(); + + sandbox.stub(os, 'freemem').callsFake(mockedOS.freemem); + sandbox.stub(os, 'totalmem').callsFake(mockedOS.totalmem); + sandbox.stub(os, 'cpus').callsFake(() => mockedOS.cpus()); + sandbox.stub(process, 'uptime').callsFake(mockedProcess.uptime); + sandbox + .stub(process, 'cpuUsage') + .callsFake(() => mockedProcess.cpuUsage()); + sandbox + .stub(process.memoryUsage, 'rss') + .callsFake(mockedProcess.memoryUsage.rss); + sandbox.stub(Network, 'networkStats').callsFake(mockedSI.networkStats); + + reader = new TestMetricReader(); + + meterProvider = new MeterProvider({ + readers: [reader], + }); + + hostMetrics = new HostMetrics( + Object.assign( + { + meterProvider, + name: '', // to get default instrumentation scope name + }, + hostMetricsConfig + ) + ); + + await hostMetrics.start(); + + // Drop first frame cpu metrics, see + // src/common.ts getCpuUsageData/getProcessCpuUsageData + await reader.collect(); + + // advance the clock for the next collection + sandbox.clock.tick(1000); + + // invalidates throttles + countSI = 0; + }; + const teardown = () => { + sandbox.restore(); + }; + + const testCaseData = [ + { + metricGroups: ['system.cpu'], + expectedMetricNames: ['system.cpu.time', 'system.cpu.utilization'], + }, + { + metricGroups: ['system.memory'], + expectedMetricNames: [ + 'system.memory.usage', + 'system.memory.utilization', + ], + }, + { + metricGroups: ['system.network'], + expectedMetricNames: [ + 'system.network.dropped', + 'system.network.errors', + 'system.network.io', + ], + }, + { + metricGroups: ['process.cpu'], + expectedMetricNames: ['process.cpu.time', 'process.cpu.utilization'], + }, + { + metricGroups: ['process.memory'], + expectedMetricNames: ['process.memory.usage'], + }, + ]; + + for (const testCaseDatum of testCaseData) { + it(`metricGroups: ${JSON.stringify( + testCaseDatum.metricGroups + )}`, async () => { + await setup({ metricGroups: testCaseDatum.metricGroups }); + const metricData = await getMetricData(reader); + const metricNames = metricData.map(md => md.descriptor.name); + assert.deepStrictEqual(metricNames, testCaseDatum.expectedMetricNames); + teardown(); + }); + } + }); }); -async function getRecords( - metricReader: MetricReader, - name: string -): Promise { +async function getMetricData( + metricReader: MetricReader +): Promise { const collectionResult = await metricReader.collect(); assert(collectionResult != null); assert.strictEqual(collectionResult.resourceMetrics.scopeMetrics.length, 1); @@ -465,7 +564,15 @@ async function getRecords( '@opentelemetry/host-metrics', 'default instrumentation scope name is the package name' ); - const metricDataList = scopeMetrics.metrics.filter( + return scopeMetrics.metrics; +} + +async function getRecords( + metricReader: MetricReader, + name: string +): Promise { + const metricData = await getMetricData(metricReader); + const metricDataList = metricData.filter( metric => metric.descriptor.name === name ); assert.strictEqual(metricDataList.length, 1); From 99ce7dfa005c6278fe5c0c9a293a1ac02f3fc3f0 Mon Sep 17 00:00:00 2001 From: Trent Mick Date: Wed, 15 Oct 2025 17:17:49 -0700 Subject: [PATCH 6/6] use the same array type suggestion here as well --- packages/host-metrics/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/host-metrics/README.md b/packages/host-metrics/README.md index 05f1a95a4e..23bd24417e 100644 --- a/packages/host-metrics/README.md +++ b/packages/host-metrics/README.md @@ -42,9 +42,9 @@ hostMetrics.start(); ## Configuration -| Option | Type | Description | -| -------------- | --------------- | ----------- | -| `metricGroups` | `Array` | Optionally specify zero or more groups of metrics to collect. This package can collect many metrics. They are grouped by metric name prefix (see the "Semantic Conventions" section below). If this option is specified, only metrics from the named groups will be collected. For example, `metricGroups: ['process.cpu', 'process.memory']` will limit collection to just those 3 metrics. | +| Option | Type | Description | +| -------------- | ---------- | ----------- | +| `metricGroups` | `string[]` | Optionally specify zero or more groups of metrics to collect. This package can collect many metrics. They are grouped by metric name prefix (see the "Semantic Conventions" section below). If this option is specified, only metrics from the named groups will be collected. For example, `metricGroups: ['process.cpu', 'process.memory']` will limit collection to just those 3 metrics. | ## Semantic Conventions