Skip to content

Commit aef9425

Browse files
authored
[receiver/prometheusreceiver] fix stalness tracking (open-telemetry#43925)
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue. Ex. Adding a feature - Explain what this achieves.--> #### Description In the dependency upgrade open-telemetry#43890 there was the PR prometheus/prometheus#16429 which changed the provisions for staleness tracking. Now the code only does the tracking if the series was successfully appended in storage. This is indicated by a non zero storage reference returned by the appender. Since we used to return 0 in all cases, the staleness tracking is now broken. Solution is to return a fake reference of 1, just to indicate success. Alternative solution would be to make a unique reference for each series label set, but that incurs an overhead which we can avoid. Performance impact: probably small as the cache should be stable over time - unless targets churn a lot between receiver instances. Resource utilization: +1 hashmap with series identifier (name+labels) pointing to cache entries. Size related to number of series. `addRef` here: https://github.com/prometheus/prometheus/blob/c8f1de18a7870e5dc99ed50119c194691c41e41c/scrape/scrape.go#L1089 <!-- Issue number (e.g. open-telemetry#1234) or full URL to issue, if applicable. --> #### Link to tracking issue Fixes open-telemetry#43893 <!--Describe what testing was performed and which tests were added.--> #### Testing Un-skip related tests. --------- Signed-off-by: György Krajcsovits <[email protected]>
1 parent fb43d28 commit aef9425

File tree

6 files changed

+40
-6
lines changed

6 files changed

+40
-6
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: bug_fix
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
7+
component: receiver/prometheus
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Fix missing staleness tracking leading to missing no recorded value data points.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [43893]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []

exporter/prometheusexporter/end_to_end_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ func TestEndToEndSummarySupport(t *testing.T) {
138138
`test_scrape_samples_scraped.instance="127.0.0.1:.*",job="otel-collector",otel_scope_name=\"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver\",otel_scope_schema_url=\"\",otel_scope_version=\"latest\". 13 .*`,
139139
`. HELP test_scrape_series_added The approximate number of new series in this scrape`,
140140
`. TYPE test_scrape_series_added gauge`,
141-
`test_scrape_series_added.instance="127.0.0.1:.*",job="otel-collector",otel_scope_name=\"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver\",otel_scope_schema_url=\"\",otel_scope_version=\"latest\". 13 .*`,
141+
`test_scrape_series_added.instance="127.0.0.1:.*",job="otel-collector",otel_scope_name=\"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver\",otel_scope_schema_url=\"\",otel_scope_version=\"latest\". (0|13) .*`,
142142
`. HELP test_up The scraping was successful`,
143143
`. TYPE test_up gauge`,
144144
`test_up.instance="127.0.0.1:.*",job="otel-collector",otel_scope_name=\"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver\",otel_scope_schema_url=\"\",otel_scope_version=\"latest\". 1 .*`,

receiver/prometheusreceiver/internal/staleness_end_to_end_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ import (
4343
// Prometheus remotewrite exporter that staleness markers are emitted per timeseries.
4444
// See https://github.com/open-telemetry/opentelemetry-collector/issues/3413
4545
func TestStalenessMarkersEndToEnd(t *testing.T) {
46-
t.Skip("Skipping test until https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/43893 is resolved")
4746
if testing.Short() {
4847
t.Skip("This test can take a long time")
4948
}

receiver/prometheusreceiver/internal/transaction.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,14 @@ func (t *transaction) Append(_ storage.SeriesRef, ls labels.Labels, atMs int64,
187187
err = curMF.addSeries(seriesRef, metricName, ls, atMs, val)
188188
if err != nil {
189189
t.logger.Warn("failed to add datapoint", zap.Error(err), zap.String("metric_name", metricName), zap.Any("labels", ls))
190+
// never return errors, as that fails the while scrape
191+
// return ref==0 indicating that the series was not added
192+
return 0, nil
190193
}
191194

192-
return 0, nil // never return errors, as that fails the whole scrape
195+
// never return errors, as that fails the whole scrape
196+
// return ref==1 indicating that the series was added and needs staleness tracking
197+
return 1, nil
193198
}
194199

195200
// detectAndStoreNativeHistogramStaleness returns true if it detects
@@ -350,9 +355,14 @@ func (t *transaction) AppendHistogram(_ storage.SeriesRef, ls labels.Labels, atM
350355
}
351356
if err != nil {
352357
t.logger.Warn("failed to add histogram datapoint", zap.Error(err), zap.String("metric_name", metricName), zap.Any("labels", ls))
358+
// never return errors, as that fails the while scrape
359+
// return ref==0 indicating that the series was not added
360+
return 0, nil
353361
}
354362

355-
return 0, nil // never return errors, as that fails the whole scrape
363+
// never return errors, as that fails the whole scrape
364+
// return ref==1 indicating that the series was added and needs staleness tracking
365+
return 1, nil
356366
}
357367

358368
func (t *transaction) AppendCTZeroSample(_ storage.SeriesRef, ls labels.Labels, atMs, ctMs int64) (storage.SeriesRef, error) {

receiver/prometheusreceiver/metrics_receiver_non_numerical_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ var totalScrapes = 10
4646

4747
// TestStaleNaNs validates that staleness marker gets generated when the timeseries is no longer present
4848
func TestStaleNaNs(t *testing.T) {
49-
t.Skip("Skipping test until https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/43893 is resolved")
5049
var mockResponses []mockPrometheusResponse
5150
for i := range totalScrapes {
5251
if i%2 == 0 {

receiver/prometheusreceiver/metrics_receiver_protobuf_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,6 @@ func TestNativeVsClassicHistogramScrapeViaProtobuf(t *testing.T) {
529529
}
530530

531531
func TestStaleExponentialHistogram(t *testing.T) {
532-
t.Skip("Skipping test until https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/43893 is resolved")
533532
mf := &dto.MetricFamily{
534533
Name: "test_counter",
535534
Type: dto.MetricType_COUNTER,

0 commit comments

Comments
 (0)