Skip to content

Commit 1ab7f12

Browse files
authored
feat: add delay option to CW exporter (#4936)
* feat: add delay option to CW exporter * linting + changelog * fix incorrect support for delay in static jobs * fix changelog
1 parent f2f08fc commit 1ab7f12

File tree

8 files changed

+325
-35
lines changed

8 files changed

+325
-35
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ Main (unreleased)
2424

2525
- Added `send_traceparent` option for `tracing` config to enable traceparent header propagation. (@MyDigitalLife)
2626

27+
- Add `delay` option to `prometheus.exporter.cloudwatch` component to delay scraping of metrics to account for CloudWatch ingestion latency. (@tmeijn)
28+
29+
- Export `yace_.*` metrics from the underlying YACE Exporter to `prometheus.exporter.cloudwatch`. (@tmeijn)
30+
2731
- (_Public Preview_) Additions to `database_observability.mysql` and `database_observability.postgres` components:
2832
- `explain_plans`
2933
- always send an explain plan log message for each query, even skipped or errored queries. (@rgeyer)

docs/sources/reference/components/prometheus/prometheus.exporter.cloudwatch.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ You can configure the `discovery` block one or multiple times to scrape metrics
196196
| `type` | `string` | CloudWatch service alias (`"alb"`, `"ec2"`, etc) or namespace name (`"AWS/EC2"`, `"AWS/S3"`, etc). Refer to [supported-services][] for a complete list. | | yes |
197197
| `custom_tags` | `map(string)` | Custom tags to be added as a list of key / value pairs. When exported to Prometheus format, the label name follows the following format: `custom_tag_{key}`. | `{}` | no |
198198
| `dimension_name_requirements` | `list(string)` | List of metric dimensions to query. Before querying metric values, the total list of metrics are filtered to only those that contain exactly this list of dimensions. An empty or undefined list results in all dimension combinations being included. | `{}` | no |
199+
| `delay` | `duration` | Delay the start time of the CloudWatch metrics query by this duration. | `0` | no |
199200
| `nil_to_zero` | `bool` | When `true`, `NaN` metric values are converted to 0. Individual metrics can override this value in the [metric][] block. | `true` | no |
200201
| `recently_active_only` | `bool` | Only return metrics that have been active in the last 3 hours. | `false` | no |
201202
| `search_tags` | `map(string)` | List of key / value pairs to use for tag filtering (all must match). The value can be a regular expression. | `{}` | no |
@@ -293,6 +294,7 @@ You can configure the `custom_namespace` block multiple times to scrape metrics
293294
| `namespace` | `string` | CloudWatch metric namespace. | | yes |
294295
| `regions` | `list(string)` | List of AWS regions. | | yes |
295296
| `custom_tags` | `map(string)` | Custom tags to be added as a list of key / value pairs. When exported to Prometheus format, the label name follows the following format: `custom_tag_{key}`. | `{}` | no |
297+
| `delay` | `duration` | Delay the start time of the CloudWatch metrics query by this duration. | `0` | no |
296298
| `dimension_name_requirements` | `list(string)` | List of metric dimensions to query. Before querying metric values, the total list of metrics are filtered to only those that contain exactly this list of dimensions. An empty or undefined list results in all dimension combinations being included. | `{}` | no |
297299
| `nil_to_zero` | `bool` | When `true`, `NaN` metric values are converted to 0. Individual metrics can override this value in the [metric][] block. | `true` | no |
298300
| `recently_active_only` | `bool` | Only return metrics that have been active in the last 3 hours. | `false` | no |

internal/component/prometheus/exporter/cloudwatch/config.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ type DiscoveryJob struct {
6060
DimensionNameRequirements []string `alloy:"dimension_name_requirements,attr,optional"`
6161
RecentlyActiveOnly bool `alloy:"recently_active_only,attr,optional"`
6262
Metrics []Metric `alloy:"metric,block"`
63+
Delay time.Duration `alloy:"delay,attr,optional"`
6364
//TODO: Remove NilToZero, because it is deprecated upstream.
6465
NilToZero *bool `alloy:"nil_to_zero,attr,optional"`
6566
}
@@ -76,6 +77,7 @@ type StaticJob struct {
7677
Namespace string `alloy:"namespace,attr"`
7778
Dimensions Dimensions `alloy:"dimensions,attr"`
7879
Metrics []Metric `alloy:"metric,block"`
80+
Delay time.Duration `alloy:"delay,attr,optional"`
7981
//TODO: Remove NilToZero, because it is deprecated upstream.
8082
NilToZero *bool `alloy:"nil_to_zero,attr,optional"`
8183
}
@@ -88,6 +90,7 @@ type CustomNamespaceJob struct {
8890
Namespace string `alloy:"namespace,attr"`
8991
RecentlyActiveOnly bool `alloy:"recently_active_only,attr,optional"`
9092
Metrics []Metric `alloy:"metric,block"`
93+
Delay time.Duration `alloy:"delay,attr,optional"`
9194
//TODO: Remove NilToZero, because it is deprecated upstream.
9295
NilToZero *bool `alloy:"nil_to_zero,attr,optional"`
9396
}
@@ -215,7 +218,6 @@ func convertToYACE(a Arguments) (yaceModel.JobsConfig, error) {
215218
if err != nil {
216219
return yaceModel.JobsConfig{}, err
217220
}
218-
cloudwatch_exporter.PatchYACEDefaults(&modelConf)
219221

220222
return modelConf, nil
221223
}
@@ -246,7 +248,7 @@ func toYACEMetrics(ms []Metric, jobNilToZero *bool) []*yaceConf.Metric {
246248
for _, m := range ms {
247249
periodSeconds := int64(m.Period.Seconds())
248250
lengthSeconds := periodSeconds
249-
// If length is other than zero, that is, is configured, override the default period vaue
251+
// If length is other than zero, that is, it is configured, override the default period value
250252
if m.Length != 0 {
251253
lengthSeconds = int64(m.Length.Seconds())
252254
}
@@ -266,10 +268,6 @@ func toYACEMetrics(ms []Metric, jobNilToZero *bool) []*yaceConf.Metric {
266268
Period: periodSeconds,
267269
Length: lengthSeconds,
268270

269-
// Delay moves back the time window for whom CloudWatch is requested data. Since we are already adjusting
270-
// this with RoundingPeriod (see toYACEDiscoveryJob), we should omit this setting.
271-
Delay: 0,
272-
273271
NilToZero: nilToZero,
274272
AddCloudwatchTimestamp: m.AddCloudwatchTimestamp,
275273
})
@@ -316,7 +314,10 @@ func toYACEDiscoveryJob(rj DiscoveryJob) *yaceConf.Job {
316314
// metrics, with the smallest period in the retrieved batch.
317315
RoundingPeriod: nil,
318316
RecentlyActiveOnly: rj.RecentlyActiveOnly,
319-
Metrics: toYACEMetrics(rj.Metrics, nilToZero),
317+
JobLevelMetricFields: yaceConf.JobLevelMetricFields{
318+
Delay: int64(rj.Delay.Seconds()),
319+
},
320+
Metrics: toYACEMetrics(rj.Metrics, nilToZero),
320321
}
321322
return job
322323
}
@@ -337,7 +338,10 @@ func toYACECustomNamespaceJob(cn CustomNamespaceJob) *yaceConf.CustomNamespace {
337338
// metrics, with the smallest period in the retrieved batch.
338339
RoundingPeriod: nil,
339340
RecentlyActiveOnly: cn.RecentlyActiveOnly,
340-
Metrics: toYACEMetrics(cn.Metrics, nilToZero),
341+
JobLevelMetricFields: yaceConf.JobLevelMetricFields{
342+
Delay: int64(cn.Delay.Seconds()),
343+
},
344+
Metrics: toYACEMetrics(cn.Metrics, nilToZero),
341345
}
342346
}
343347

internal/component/prometheus/exporter/cloudwatch/config_test.go

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,65 @@ custom_namespace "customEC2Metrics" {
222222
}
223223
`
224224

225+
const discoveryJobDelayConfig = `
226+
sts_region = "us-east-2"
227+
debug = true
228+
discovery {
229+
type = "AWS/EC2"
230+
regions = ["us-east-2"]
231+
delay = "2m"
232+
metric {
233+
name = "CPUUtilization"
234+
statistics = ["Average"]
235+
period = "5m"
236+
}
237+
metric {
238+
name = "NetworkIn"
239+
statistics = ["Sum"]
240+
period = "5m"
241+
}
242+
}
243+
`
244+
245+
const staticJobDelayConfig = `
246+
sts_region = "us-east-2"
247+
debug = true
248+
static "test_instance" {
249+
regions = ["us-east-2"]
250+
namespace = "AWS/EC2"
251+
dimensions = {
252+
"InstanceId" = "i-test",
253+
}
254+
metric {
255+
name = "CPUUtilization"
256+
statistics = ["Average"]
257+
period = "5m"
258+
}
259+
}
260+
`
261+
262+
const customNamespaceDelayConfig = `
263+
sts_region = "eu-west-1"
264+
265+
custom_namespace "testMetrics" {
266+
namespace = "TestMetrics"
267+
regions = ["us-east-1"]
268+
delay = "30s"
269+
270+
metric {
271+
name = "metric1"
272+
statistics = ["Average"]
273+
period = "1m"
274+
}
275+
276+
metric {
277+
name = "metric2"
278+
statistics = ["Sum"]
279+
period = "1m"
280+
}
281+
}
282+
`
283+
225284
func TestCloudwatchComponentConfig(t *testing.T) {
226285
type testcase struct {
227286
raw string
@@ -560,6 +619,110 @@ func TestCloudwatchComponentConfig(t *testing.T) {
560619
},
561620
},
562621
},
622+
"discovery job with delay": {
623+
raw: discoveryJobDelayConfig,
624+
expected: yaceModel.JobsConfig{
625+
StsRegion: "us-east-2",
626+
DiscoveryJobs: []yaceModel.DiscoveryJob{
627+
{
628+
Regions: []string{"us-east-2"},
629+
Roles: []yaceModel.Role{{}},
630+
Type: "AWS/EC2",
631+
SearchTags: []yaceModel.SearchTag{},
632+
CustomTags: []yaceModel.Tag{},
633+
Metrics: []*yaceModel.MetricConfig{
634+
{
635+
Name: "CPUUtilization",
636+
Statistics: []string{"Average"},
637+
Period: 300,
638+
Length: 300,
639+
Delay: 120, // 2 minutes
640+
NilToZero: defaultNilToZero,
641+
},
642+
{
643+
Name: "NetworkIn",
644+
Statistics: []string{"Sum"},
645+
Period: 300,
646+
Length: 300,
647+
Delay: 120, // 2 minutes
648+
NilToZero: defaultNilToZero,
649+
},
650+
},
651+
RoundingPeriod: nil,
652+
ExportedTagsOnMetrics: []string{},
653+
DimensionsRegexps: []yaceModel.DimensionsRegexp{
654+
{
655+
Regexp: regexp.MustCompile("instance/(?P<InstanceId>[^/]+)"),
656+
DimensionsNames: []string{"InstanceId"},
657+
},
658+
},
659+
},
660+
},
661+
},
662+
},
663+
"static job with delay": {
664+
raw: staticJobDelayConfig,
665+
expected: yaceModel.JobsConfig{
666+
StsRegion: "us-east-2",
667+
StaticJobs: []yaceModel.StaticJob{
668+
{
669+
Name: "test_instance",
670+
Roles: []yaceModel.Role{{}},
671+
Regions: []string{"us-east-2"},
672+
Namespace: "AWS/EC2",
673+
CustomTags: []yaceModel.Tag{},
674+
Dimensions: []yaceModel.Dimension{
675+
{
676+
Name: "InstanceId",
677+
Value: "i-test",
678+
},
679+
},
680+
Metrics: []*yaceModel.MetricConfig{{
681+
Name: "CPUUtilization",
682+
Statistics: []string{"Average"},
683+
Period: 300,
684+
Length: 300,
685+
Delay: 0, // Delay not supported for static jobs
686+
NilToZero: defaultNilToZero,
687+
}},
688+
},
689+
},
690+
},
691+
},
692+
"custom namespace job with delay": {
693+
raw: customNamespaceDelayConfig,
694+
expected: yaceModel.JobsConfig{
695+
StsRegion: "eu-west-1",
696+
CustomNamespaceJobs: []yaceModel.CustomNamespaceJob{
697+
{
698+
Name: "testMetrics",
699+
Regions: []string{"us-east-1"},
700+
Roles: []yaceModel.Role{{}},
701+
CustomTags: []yaceModel.Tag{},
702+
Namespace: "TestMetrics",
703+
Metrics: []*yaceModel.MetricConfig{
704+
{
705+
Name: "metric1",
706+
Statistics: []string{"Average"},
707+
Period: 60,
708+
Length: 60,
709+
Delay: 30, // 30 seconds
710+
NilToZero: defaultNilToZero,
711+
},
712+
{
713+
Name: "metric2",
714+
Statistics: []string{"Sum"},
715+
Period: 60,
716+
Length: 60,
717+
Delay: 30, // 30 seconds
718+
NilToZero: defaultNilToZero,
719+
},
720+
},
721+
RoundingPeriod: nil,
722+
},
723+
},
724+
},
725+
},
563726
} {
564727
t.Run(name, func(t *testing.T) {
565728
args := Arguments{}

internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ func (e *exporter) MetricsHandler() (http.Handler, error) {
7272
defer e.cachingClientFactory.Clear()
7373

7474
reg := prometheus.NewRegistry()
75+
for _, metric := range yace.Metrics {
76+
if err := reg.Register(metric); err != nil {
77+
e.logger.Debug("Could not register cloudwatch api metric")
78+
}
79+
}
7580
err := yace.UpdateMetrics(
7681
context.Background(),
7782
e.logger,

internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter_decoupled.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ func (e *asyncExporter) scrape(ctx context.Context) {
100100
defer e.cachingClientFactory.Clear()
101101

102102
reg := prometheus.NewRegistry()
103+
for _, metric := range yace.Metrics {
104+
if err := reg.Register(metric); err != nil {
105+
e.logger.Debug("Could not register cloudwatch api metric")
106+
}
107+
}
103108
err := yace.UpdateMetrics(
104109
ctx,
105110
e.logger,

internal/static/integrations/cloudwatch_exporter/config.go

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,12 @@ type TagsPerNamespace map[string][]string
7070
type DiscoveryJob struct {
7171
InlineRegionAndRoles `yaml:",inline"`
7272
InlineCustomTags `yaml:",inline"`
73-
SearchTags []Tag `yaml:"search_tags"`
74-
Type string `yaml:"type"`
75-
DimensionNameRequirements []string `yaml:"dimension_name_requirements"`
76-
Metrics []Metric `yaml:"metrics"`
77-
NilToZero *bool `yaml:"nil_to_zero,omitempty"`
73+
SearchTags []Tag `yaml:"search_tags"`
74+
Type string `yaml:"type"`
75+
DimensionNameRequirements []string `yaml:"dimension_name_requirements"`
76+
Metrics []Metric `yaml:"metrics"`
77+
Delay time.Duration `yaml:"delay,omitempty"`
78+
NilToZero *bool `yaml:"nil_to_zero,omitempty"`
7879
}
7980

8081
// StaticJob will scrape metrics that match all defined dimensions.
@@ -231,28 +232,10 @@ func toYACEConfig(c *Config) (yaceModel.JobsConfig, bool, error) {
231232
if err != nil {
232233
return yaceModel.JobsConfig{}, fipsEnabled, err
233234
}
234-
PatchYACEDefaults(&modelConf)
235235

236236
return modelConf, fipsEnabled, nil
237237
}
238238

239-
// PatchYACEDefaults overrides some default values YACE applies after validation.
240-
func PatchYACEDefaults(yc *yaceModel.JobsConfig) {
241-
// YACE doesn't allow during validation a zero-delay in each metrics scrape. Override this behaviour since it's taken
242-
// into account by the rounding period.
243-
// https://github.com/prometheus-community/yet-another-cloudwatch-exporter/blob/7e5949124bb5f26353eeff298724a5897de2a2a4/pkg/config/config.go#L320
244-
for _, job := range yc.DiscoveryJobs {
245-
for _, metric := range job.Metrics {
246-
metric.Delay = 0
247-
}
248-
}
249-
for _, staticConf := range yc.StaticJobs {
250-
for _, metric := range staticConf.Metrics {
251-
metric.Delay = 0
252-
}
253-
}
254-
}
255-
256239
func toYACEStaticJob(job StaticJob) *yaceConf.Static {
257240
nilToZero := job.NilToZero
258241
if nilToZero == nil {
@@ -298,6 +281,9 @@ func toYACEDiscoveryJob(job *DiscoveryJob) *yaceConf.Job {
298281
// By setting RoundingPeriod to nil, the exporter will align the start and end times for retrieving CloudWatch
299282
// metrics, with the smallest period in the retrieved batch.
300283
RoundingPeriod: nil,
284+
JobLevelMetricFields: yaceConf.JobLevelMetricFields{
285+
Delay: int64(job.Delay.Seconds()),
286+
},
301287
}
302288
return &yaceJob
303289
}
@@ -328,10 +314,6 @@ func toYACEMetrics(metrics []Metric, jobNilToZero *bool) []*yaceConf.Metric {
328314
Period: periodSeconds,
329315
Length: lengthSeconds,
330316

331-
// Delay moves back the time window for whom CloudWatch is requested data. Since we are already adjusting
332-
// this with RoundingPeriod (see toYACEDiscoveryJob), we should omit this setting.
333-
Delay: 0,
334-
335317
NilToZero: nilToZero,
336318
AddCloudwatchTimestamp: &addCloudwatchTimestamp,
337319
})

0 commit comments

Comments
 (0)