From 26617531c27a3e8ce3f404e0172221bdee55d18f Mon Sep 17 00:00:00 2001 From: Roger Coll Date: Tue, 25 Nov 2025 15:10:34 +0100 Subject: [PATCH 1/3] feat: add dynamic data stream routing for connectors --- exporter/elasticsearchexporter/README.md | 2 +- .../elasticsearchexporter/data_stream_router.go | 16 +++++++++++----- .../data_stream_router_test.go | 12 ++++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 4a15bf48705db..959033be98d2f 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -114,7 +114,7 @@ The resulting documents will contain the corresponding `data_stream.*` fields, s 2. Otherwise, if a scope attribute with the name `encoding.format` exists and contains a string value, `data_stream.dataset` will be set to this value. Note that while enabled by default, this behaviour is considered experimental. Some encoding extensions set this field (e.g. [awslogsencodingextension](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/encoding/awslogsencodingextension)), but it is not yet part of Semantic Conventions. There is the potential that the name of this routing field evolves as the [discussion progresses in SemConv](https://github.com/open-telemetry/semantic-conventions/issues/2854). - 3. Otherwise, if scope name matches regex `/receiver/(\w*receiver)`, `data_stream.dataset` will be capture group #1 + 3. Otherwise, if scope name matches regex `/receiver/(\w*receiver)` or `/connector/(\w*connector)`, `data_stream.dataset` will be capture group #1 4. Otherwise, `data_stream.dataset` falls back to `generic` and `data_stream.namespace` falls back to `default`. [^3]: See additional handling in [Document routing exceptions for OTel data mode](#document-routing-exceptions-for-otel-data-mode) diff --git a/exporter/elasticsearchexporter/data_stream_router.go b/exporter/elasticsearchexporter/data_stream_router.go index 1c9ee451316c0..385ebae2cb996 100644 --- a/exporter/elasticsearchexporter/data_stream_router.go +++ b/exporter/elasticsearchexporter/data_stream_router.go @@ -15,7 +15,11 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/elasticsearch" ) -var receiverRegex = regexp.MustCompile(`/receiver/(\w+receiver)`) +// var receiverRegex = regexp.MustCompile(`/receiver/(\w+receiver)|/connector/(\w+connector)`) +var componentsRegex = []*regexp.Regexp{ + regexp.MustCompile(`/receiver/(\w+receiver)`), + regexp.MustCompile(`/connector/(\w+connector)`), +} var selfTelemetryScopeNames = map[string]bool{ "go.opentelemetry.io/collector/receiver/receiverhelper": true, @@ -228,13 +232,15 @@ func applyScopeRouting(scope pcommon.InstrumentationScope) (string, bool) { } } - // Receiver-based routing + // {receiver/connector}-based routing // For example, hostmetricsreceiver (or hostmetricsreceiver.otel in the OTel output mode) // for the scope name // github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/cpuscraper - loc := receiverRegex.FindStringSubmatchIndex(scope.Name()) - if len(loc) == 4 { - return scope.Name()[loc[2]:loc[3]], true + for _, componentRegex := range componentsRegex { + loc := componentRegex.FindStringSubmatchIndex(scope.Name()) + if len(loc) == 4 { + return scope.Name()[loc[2]:loc[3]], true + } } return "", false diff --git a/exporter/elasticsearchexporter/data_stream_router_test.go b/exporter/elasticsearchexporter/data_stream_router_test.go index 317db42762554..674f2cf114bec 100644 --- a/exporter/elasticsearchexporter/data_stream_router_test.go +++ b/exporter/elasticsearchexporter/data_stream_router_test.go @@ -200,12 +200,24 @@ func TestApplyRouting(t *testing.T) { wantDataset: "hostmetricsreceiver", wantFound: true, }, + { + name: "connector-based routing with spanmetricsconnector", + scopeName: "github.com/open-telemetry/opentelemetry-collector-contrib/connector/spanmetricsconnector", + wantDataset: "spanmetricsconnector", + wantFound: true, + }, { name: "receiver without a receiver name", scopeName: "some.scope.name/receiver/receiver/should/be/ignored", wantDataset: "", wantFound: false, }, + { + name: "connector without a connector name", + scopeName: "some.scope.name/connector/connector/should/be/ignored", + wantDataset: "", + wantFound: false, + }, { name: "otel collector self-telemetry for receivers", scopeName: "go.opentelemetry.io/collector/receiver/receiverhelper", From f74eea6b9a9d303c35bffd720932a3cb8ed5c884 Mon Sep 17 00:00:00 2001 From: Roger Coll Date: Wed, 26 Nov 2025 16:56:14 +0100 Subject: [PATCH 2/3] chore: add changelog --- .chloggen/extend_ds_regex.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .chloggen/extend_ds_regex.yaml diff --git a/.chloggen/extend_ds_regex.yaml b/.chloggen/extend_ds_regex.yaml new file mode 100644 index 0000000000000..cd3073accea8c --- /dev/null +++ b/.chloggen/extend_ds_regex.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: 'enhancement' + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: exporter/elasticsearch + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: add dynamic data stream routing for connectors + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [44525] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] From acb7a12089ec89ce0303ec58211f8a8c79992e5b Mon Sep 17 00:00:00 2001 From: Roger Coll Date: Wed, 26 Nov 2025 17:00:12 +0100 Subject: [PATCH 3/3] remove commented regex --- exporter/elasticsearchexporter/data_stream_router.go | 1 - 1 file changed, 1 deletion(-) diff --git a/exporter/elasticsearchexporter/data_stream_router.go b/exporter/elasticsearchexporter/data_stream_router.go index 385ebae2cb996..1cce017c6669d 100644 --- a/exporter/elasticsearchexporter/data_stream_router.go +++ b/exporter/elasticsearchexporter/data_stream_router.go @@ -15,7 +15,6 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/elasticsearch" ) -// var receiverRegex = regexp.MustCompile(`/receiver/(\w+receiver)|/connector/(\w+connector)`) var componentsRegex = []*regexp.Regexp{ regexp.MustCompile(`/receiver/(\w+receiver)`), regexp.MustCompile(`/connector/(\w+connector)`),