Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ All notable changes to this project will be documented in this file.
([#823](https://github.com/open-telemetry/weaver/pull/823) by @lmolkova)
- Don't serialize default values and empty arrays when resolving semantic conventions.
([#822](https://github.com/open-telemetry/weaver/pull/822) by @lmolkova)
- Support for describing aggregation being done on metrics
([#845](https://github.com/open-telemetry/weaver/pull/822) by @thompson-tomo)

# [0.16.1] - 2025-07-04

Expand Down
9 changes: 9 additions & 0 deletions crates/weaver_forge/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use weaver_resolved_schema::attribute::Attribute;
use weaver_resolved_schema::catalog::Catalog;
use weaver_resolved_schema::lineage::GroupLineage;
use weaver_resolved_schema::registry::{Group, Registry};
use weaver_semconv::aggregation::AggregationSpec;
use weaver_semconv::any_value::AnyValueSpec;
use weaver_semconv::deprecated::Deprecated;
use weaver_semconv::group::{GroupType, InstrumentSpec, SpanKindSpec};
Expand Down Expand Up @@ -99,6 +100,12 @@ pub struct ResolvedGroup {
/// Note: This field is required if type is metric.
#[serde(skip_serializing_if = "Option::is_none")]
pub unit: Option<String>,
/// The aggregation which should occur on the data points being capture by a meter.
/// Semconv metrics all use the default aggregation type, hence this option is for
/// providing the parameters of the aggregation.
/// For more details: [Metrics SDK - Aggregation](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#aggregation).
#[serde(skip_serializing_if = "Option::is_none")]
pub aggregation: Option<AggregationSpec>,
/// The name of the event. If not specified, the prefix is used.
/// If prefix is empty (or unspecified), name is required.
#[serde(skip_serializing_if = "Option::is_none")]
Expand Down Expand Up @@ -168,6 +175,7 @@ impl ResolvedGroup {
metric_name: group.metric_name.clone(),
instrument: group.instrument.clone(),
unit: group.unit.clone(),
aggregation: group.aggregation.clone(),
name: group.name.clone(),
lineage,
display_name: group.display_name.clone(),
Expand Down Expand Up @@ -229,6 +237,7 @@ impl ResolvedRegistry {
metric_name: group.metric_name.clone(),
instrument: group.instrument.clone(),
unit: group.unit.clone(),
aggregation: group.aggregation.clone(),
name: group.name.clone(),
lineage,
display_name: group.display_name.clone(),
Expand Down
7 changes: 7 additions & 0 deletions crates/weaver_resolved_schema/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::registry::GroupStats::{
AttributeGroup, Entity, Event, Metric, MetricGroup, Scope, Span, Undefined,
};
use serde::{Deserialize, Serialize};
use weaver_semconv::aggregation::AggregationSpec;
use weaver_semconv::deprecated::Deprecated;
use weaver_semconv::group::{GroupType, InstrumentSpec, SpanKindSpec};
use weaver_semconv::provenance::Provenance;
Expand Down Expand Up @@ -113,6 +114,12 @@ pub struct Group {
/// Note: This field is required if type is metric.
#[serde(skip_serializing_if = "Option::is_none")]
pub unit: Option<String>,
/// The aggregation which should occur on the data points being capture by a meter.
/// Semconv metrics all use the default aggregation type, hence this option is for
/// providing the parameters of the aggregation.
/// For more details: [Metrics SDK - Aggregation](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#aggregation).
#[serde(skip_serializing_if = "Option::is_none")]
pub aggregation: Option<AggregationSpec>,
/// The name of the event. If not specified, the prefix is used.
/// If prefix is empty (or unspecified), name is required.
#[serde(skip_serializing_if = "Option::is_none")]
Expand Down
1 change: 1 addition & 0 deletions crates/weaver_resolver/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ fn group_from_spec(group: GroupSpecWithProvenance) -> UnresolvedGroup {
metric_name: group.spec.metric_name,
instrument: group.spec.instrument,
unit: group.spec.unit,
aggregation: group.spec.aggregation,
name: group.spec.name,
lineage: Some(GroupLineage::new(group.provenance.clone())),
display_name: group.spec.display_name,
Expand Down
7 changes: 7 additions & 0 deletions crates/weaver_semconv/data/jvm-metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ groups:
brief: "Duration of JVM garbage collection actions."
instrument: histogram
unit: "s"
aggregation:
parameters:
boundaries:
- 0
- 5
- 10
recordMinMax: true
attributes:
- id: jvm.gc.name
stability: stable
Expand Down
25 changes: 25 additions & 0 deletions crates/weaver_semconv/src/aggregation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// SPDX-License-Identifier: Apache-2.0

//! Metric specification.
//!
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::YamlValue;

/// An aggregation specification.
#[derive(Serialize, Deserialize, Clone, Debug, Eq, PartialEq, JsonSchema)]
#[serde(deny_unknown_fields)]
pub struct AggregationSpec {
/// The parameters used in the aggregation
#[serde(skip_serializing_if = "Option::is_none")]
pub parameters: Option<HashMap<String, YamlValue>>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed in the tooling call:

bucket boundaries are advisory params, changing them would not be breaking, so we should put them into annotations.

It'd still be useful to define the specific format, but not in the rust code. We can define specific format in the JSON schema, but we can also do it later and start by suggesting the format inside semconv repo.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with having it as advisory won't you run into issues/inaccurate data if you have multiple instruments generating measurements with different boundaries but using the same metric. Hence hard to see it as advisory but more a requirement. Also what about the aggregation type, that is also a requirment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"won't you run into issues/inaccurate data if you have multiple instruments generating measurements with different boundaries but using the same metric"

Not when following best practices for histograms/distributions. in fact, our Exponential histograms are designed around bucket boundaries changing in the lifespan of the same time series.

The OpenTelemetry specification has explicitly allowed bucket boundaries to change during a timeseries lifespan. So "default advise" would not have this restriction, and many metric systems will handle this effectively (even prometheus if using functions like histogram_quantile()).

So, by default, weaver will not enforce this. If someone wanted to enforce this, weaver would ALLOW that via annotations and custom rego policies.

Copy link
Contributor Author

@thompson-tomo thompson-tomo Jul 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This aggregation object is for defining the properties based on the aggregation type being used. The supported settings are described in the spec at https://opentelemetry.io/docs/specs/otel/metrics/sdk/#aggregation

That document gives the impression that if you define explicit boundaries the sdk is instructed to use them.

I will make it more explicit by adding the type in there, that way it is possible to fall back to the defaults. That also provides a way to indicate it is exponential etc.

Note annotations are not emitted by weaver for group members/signal in resolved form.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies for the delay in responding here. A few important points:

Note annotations are not emitted by weaver for group members/signal in resolved form.

This is a bug that should be fixed.

That document gives the impression that if you define explicit boundaries the sdk is instructed to use them.

Yes but there are two users of semconv here: The storage and visualization that uses the histogram and the instrumentation that produces it.

What we do NOT want is to pretend like there is a perfect set of boundaries in semconv for which all histograms should abide. We can provide a default to codegen. In reality, getting histogram boundaries right often needs specific knoweldge of the system being developed. Poor boundaries can lead to inefficient and inaccurate histograms for your services. This is why we have an "advice" API and want histogram boundaries as a "hint" to codegen vs. a first class thing. Additionally, this is why we're moving to exponential histograms, where you can more easily say "here's the resolution I want" and the histogram expands boundaries to fit the appropriate distribution.

Today - we took the approach the code generation + "advice" in Metrics should be a hint in weaver.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What we do NOT want is to pretend like there is a perfect set of boundaries in semconv for which all histograms should abide.

Agree it is bespoke to the metric and if not explicitly configured then the default is used which could be the metric default or the global default.

Additionally, this is why we're moving to exponential histograms, where you can more easily say "here's the resolution I want" and the histogram expands boundaries to fit the appropriate distribution.

For the exponential histogram do you see the scale and/or size properties as being general advice or requirements which should be followed?

Also what about aggregation method? Is this a requirement or advice?

}

impl AggregationSpec {
/// Returns the parameters of the aggregation.
#[must_use]
pub fn parameters(&self) -> &Option<HashMap<String, YamlValue>> {
&self.parameters
}
}
18 changes: 18 additions & 0 deletions crates/weaver_semconv/src/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::fmt::{Display, Formatter};

use crate::aggregation::AggregationSpec;
use crate::any_value::AnyValueSpec;
use crate::attribute::{AttributeSpec, AttributeType, PrimitiveOrArrayTypeSpec};
use crate::deprecated::Deprecated;
Expand Down Expand Up @@ -97,6 +98,12 @@ pub struct GroupSpec {
/// Note: This field is required if type is metric.
#[serde(skip_serializing_if = "Option::is_none")]
pub unit: Option<String>,
/// The aggregation which should occur on the data points being capture by a meter.
/// Semconv metrics all use the default aggregation type, hence this option is for
/// providing the parameters of the aggregation.
/// For more details: [Metrics SDK - Aggregation](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#aggregation).
#[serde(skip_serializing_if = "Option::is_none")]
pub aggregation: Option<AggregationSpec>,
/// The name of the event (valid only when the group `type` is `event`).
///
/// Note: If not specified, the prefix is used. If the prefix is empty (or unspecified), the name is required.
Expand Down Expand Up @@ -694,6 +701,7 @@ mod tests {
metric_name: None,
instrument: None,
unit: None,
aggregation: None,
name: None,
display_name: None,
body: None,
Expand Down Expand Up @@ -859,6 +867,7 @@ mod tests {
metric_name: None,
instrument: None,
unit: None,
aggregation: None,
name: None,
display_name: None,
body: None,
Expand Down Expand Up @@ -1145,6 +1154,7 @@ mod tests {
metric_name: None,
instrument: None,
unit: None,
aggregation: None,
display_name: None,
attributes: vec![],
body: Some(AnyValueSpec::String {
Expand Down Expand Up @@ -1360,6 +1370,7 @@ mod tests {
metric_name: None,
instrument: None,
unit: None,
aggregation: None,
display_name: None,
attributes: vec![],
body: Some(AnyValueSpec::String {
Expand Down Expand Up @@ -1515,6 +1526,7 @@ mod tests {
metric_name: None,
instrument: None,
unit: None,
aggregation: None,
name: None,
display_name: None,
body: None,
Expand Down Expand Up @@ -1586,6 +1598,7 @@ mod tests {
group.metric_name = Some("test".to_owned());
group.instrument = Some(Counter);
group.unit = Some("test".to_owned());
group.aggregation = None;
let result = group.validate("<test>").into_result_failing_non_fatal();
assert_eq!(
Err(InvalidGroupStability {
Expand Down Expand Up @@ -1685,6 +1698,7 @@ mod tests {
metric_name: None,
instrument: None,
unit: None,
aggregation: None,
name: None,
display_name: None,
body: None,
Expand Down Expand Up @@ -1776,6 +1790,7 @@ mod tests {
group.metric_name = Some("test".to_owned());
group.instrument = Some(Counter);
group.unit = Some("test".to_owned());
group.aggregation = None;
assert!(group
.validate("<test>")
.into_result_failing_non_fatal()
Expand Down Expand Up @@ -1837,6 +1852,7 @@ mod tests {
metric_name: None,
instrument: None,
unit: None,
aggregation: None,
name: None,
display_name: None,
body: None,
Expand Down Expand Up @@ -1897,6 +1913,7 @@ mod tests {
metric_name: Some("metric".to_owned()),
instrument: Some(Gauge),
unit: Some("{thing}".to_owned()),
aggregation: None,
name: None,
display_name: None,
body: None,
Expand All @@ -1913,6 +1930,7 @@ mod tests {
group.metric_name = None;
group.instrument = None;
group.unit = None;
group.aggregation = None;
group.span_kind = Some(SpanKindSpec::Client);
assert!(group
.validate("<test>")
Expand Down
1 change: 1 addition & 0 deletions crates/weaver_semconv/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use std::path::PathBuf;
use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages};
use weaver_common::error::{format_errors, WeaverError};

pub mod aggregation;
pub mod any_value;
pub mod attribute;
pub mod deprecated;
Expand Down
2 changes: 2 additions & 0 deletions crates/weaver_semconv/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ mod tests {
metric_name: None,
instrument: None,
unit: None,
aggregation: None,
brief: "brief".to_owned(),
note: "note".to_owned(),
extends: None,
Expand Down Expand Up @@ -375,6 +376,7 @@ mod tests {
metric_name: None,
instrument: None,
unit: None,
aggregation: None,
brief: "brief".to_owned(),
note: "note".to_owned(),
extends: None,
Expand Down