Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
6fb98f5
Add support for detecting synthetic source.
JacksonWeber Aug 4, 2025
24537f6
Update CHANGELOG.md
JacksonWeber Aug 4, 2025
10353ad
Update __init__.py
JacksonWeber Aug 4, 2025
8cfb235
Move const values to a constants file.
JacksonWeber Aug 8, 2025
9db1219
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Aug 11, 2025
4703e5f
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Aug 18, 2025
dddba48
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Aug 19, 2025
1385bce
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Aug 22, 2025
2672bd5
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Aug 25, 2025
4ab76d3
use existing sem conv.
JacksonWeber Aug 25, 2025
a64956a
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Aug 29, 2025
401d59d
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 16, 2025
d29cb64
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 17, 2025
11b557e
Move changes to the http package.
JacksonWeber Sep 17, 2025
19be82c
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 17, 2025
a11b69a
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 22, 2025
6e189c1
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 23, 2025
5283de4
Update util/opentelemetry-util-http/tests/test_detect_synthetic_user_…
JacksonWeber Sep 23, 2025
202c6db
Add synthetic detection on the server side.
JacksonWeber Sep 24, 2025
f44f2fb
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 24, 2025
f04f046
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 24, 2025
c112f99
Fix linting.
JacksonWeber Sep 24, 2025
755f41d
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 24, 2025
be71c94
Update test_asgi_middleware.py
JacksonWeber Sep 24, 2025
c1a971f
Update test_asgi_middleware.py
JacksonWeber Sep 24, 2025
46cbd48
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 25, 2025
30a7951
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 29, 2025
e0cbcee
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 30, 2025
a7513f2
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Sep 30, 2025
cf99868
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Oct 1, 2025
9f31267
Merge branch 'main' into jacksonweber/populate-synthetic-attributes
JacksonWeber Oct 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added

- `opentelemetry-instrumentation-requests` Detect synthetic sources on requests.
([#3674](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3674))

### Fixed

- `opentelemetry-instrumentation`: Avoid calls to `context.detach` with `None` token.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,16 @@ def response_hook(span, request_obj, response):
_StabilityMode,
)
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
from opentelemetry.instrumentation.requests.constants import (
BOT_PATTERNS,
TEST_PATTERNS,
)
from opentelemetry.instrumentation.requests.package import _instruments
from opentelemetry.instrumentation.requests.semconv import (
ATTR_USER_AGENT_SYNTHETIC_TYPE,
USER_AGENT_SYNTHETIC_TYPE_VALUE_BOT,
USER_AGENT_SYNTHETIC_TYPE_VALUE_TEST,
)
from opentelemetry.instrumentation.requests.version import __version__
from opentelemetry.instrumentation.utils import (
is_http_instrumentation_enabled,
Expand Down Expand Up @@ -158,6 +167,33 @@ def response_hook(span, request_obj, response):
_ResponseHookT = Optional[Callable[[Span, PreparedRequest, Response], None]]


def _detect_synthetic_user_agent(user_agent: str) -> Optional[str]:
"""
Detect synthetic user agent type based on user agent string contents.

Args:
user_agent: The user agent string to analyze

Returns:
USER_AGENT_SYNTHETIC_TYPE_VALUE_TEST if user agent contains any pattern from TEST_PATTERNS
USER_AGENT_SYNTHETIC_TYPE_VALUE_BOT if user agent contains any pattern from BOT_PATTERNS
None otherwise

Note: Test patterns take priority over bot patterns.
"""
if not user_agent:
return None

user_agent_lower = user_agent.lower()

if any(test_pattern in user_agent_lower for test_pattern in TEST_PATTERNS):
return USER_AGENT_SYNTHETIC_TYPE_VALUE_TEST
if any(bot_pattern in user_agent_lower for bot_pattern in BOT_PATTERNS):
return USER_AGENT_SYNTHETIC_TYPE_VALUE_BOT

return None


def _set_http_status_code_attribute(
span,
status_code,
Expand Down Expand Up @@ -234,6 +270,9 @@ def get_or_create_headers():

url = redact_url(request.url)

# Get headers early for user agent detection
headers = get_or_create_headers()

span_attributes = {}
_set_http_method(
span_attributes,
Expand All @@ -243,6 +282,12 @@ def get_or_create_headers():
)
_set_http_url(span_attributes, url, sem_conv_opt_in_mode)

# Check for synthetic user agent type
user_agent = headers.get("User-Agent")
synthetic_type = _detect_synthetic_user_agent(user_agent)
if synthetic_type:
span_attributes[ATTR_USER_AGENT_SYNTHETIC_TYPE] = synthetic_type

metric_labels = {}
_set_http_method(
metric_labels,
Expand Down Expand Up @@ -297,7 +342,6 @@ def get_or_create_headers():
if callable(request_hook):
request_hook(span, request)

headers = get_or_create_headers()
inject(headers)

with suppress_http_instrumentation():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Constants for OpenTelemetry requests instrumentation.

This module contains configuration constants and pattern definitions used
by the requests instrumentation for various features like synthetic user
agent detection.
"""

# Test patterns to detect in user agent strings (case-insensitive)
# These patterns indicate synthetic test traffic
TEST_PATTERNS = [
"alwayson",
]

# Bot patterns to detect in user agent strings (case-insensitive)
# These patterns indicate automated bot traffic
BOT_PATTERNS = [
"googlebot",
"bingbot",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Semantic conventions for user agent synthetic type detection.

This module defines constants for user agent synthetic type attributes and values
according to OpenTelemetry semantic conventions.

**EXPERIMENTAL**: This module contains experimental semantic conventions that are not
yet part of the official OpenTelemetry semantic conventions specification. These
attributes and values may experience breaking changes in future versions as the
specification evolves.

The semantic conventions defined here are used to classify synthetic traffic based
on User-Agent header analysis, helping distinguish between:
- Bot traffic (web crawlers, search engine bots)
- Test traffic (monitoring systems, health checks)
- Regular user traffic

These experimental conventions may be:
1. Modified with different attribute names or values
2. Moved to official semantic convention packages
3. Deprecated in favor of standardized alternatives
4. Changed based on community feedback and specification updates

Users should be prepared for potential breaking changes when upgrading and should
monitor OpenTelemetry specification updates for official semantic convention releases.
"""

# User agent synthetic type attribute
ATTR_USER_AGENT_SYNTHETIC_TYPE = "user_agent.synthetic.type"

# User agent synthetic type values
USER_AGENT_SYNTHETIC_TYPE_VALUE_BOT = "bot"
USER_AGENT_SYNTHETIC_TYPE_VALUE_TEST = "test"
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import httpretty
import requests

from opentelemetry.instrumentation.requests import RequestsInstrumentor
from opentelemetry.instrumentation.requests.semconv import (
ATTR_USER_AGENT_SYNTHETIC_TYPE,
USER_AGENT_SYNTHETIC_TYPE_VALUE_BOT,
USER_AGENT_SYNTHETIC_TYPE_VALUE_TEST,
)
from opentelemetry.test.test_base import TestBase


class TestUserAgentSynthetic(TestBase):
URL = "http://mock/status/200"

def setUp(self):
super().setUp()
RequestsInstrumentor().instrument()
httpretty.enable()
httpretty.register_uri(httpretty.GET, self.URL, body="Hello!")

def tearDown(self):
super().tearDown()
RequestsInstrumentor().uninstrument()
httpretty.disable()

def assert_span(self, num_spans=1):
span_list = self.memory_exporter.get_finished_spans()
self.assertEqual(num_spans, len(span_list))
if num_spans == 0:
return None
if num_spans == 1:
return span_list[0]
return span_list

def test_user_agent_bot_googlebot(self):
"""Test that googlebot user agent is marked as 'bot'"""
headers = {
"User-Agent": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
self.assertEqual(
span.attributes.get(ATTR_USER_AGENT_SYNTHETIC_TYPE),
USER_AGENT_SYNTHETIC_TYPE_VALUE_BOT,
)

def test_user_agent_bot_bingbot(self):
"""Test that bingbot user agent is marked as 'bot'"""
headers = {
"User-Agent": "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
self.assertEqual(
span.attributes.get(ATTR_USER_AGENT_SYNTHETIC_TYPE),
USER_AGENT_SYNTHETIC_TYPE_VALUE_BOT,
)

def test_user_agent_test_alwayson(self):
"""Test that alwayson user agent is marked as 'test'"""
headers = {"User-Agent": "AlwaysOn-Monitor/1.0"}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
self.assertEqual(
span.attributes.get(ATTR_USER_AGENT_SYNTHETIC_TYPE),
USER_AGENT_SYNTHETIC_TYPE_VALUE_TEST,
)

def test_user_agent_case_insensitive(self):
"""Test that detection is case insensitive"""
headers = {"User-Agent": "GOOGLEBOT/2.1"}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
self.assertEqual(
span.attributes.get(ATTR_USER_AGENT_SYNTHETIC_TYPE),
USER_AGENT_SYNTHETIC_TYPE_VALUE_BOT,
)

self.memory_exporter.clear()

headers = {"User-Agent": "ALWAYSON-Monitor/1.0"}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
self.assertEqual(
span.attributes.get(ATTR_USER_AGENT_SYNTHETIC_TYPE),
USER_AGENT_SYNTHETIC_TYPE_VALUE_TEST,
)

def test_user_agent_normal_browser(self):
"""Test that normal browser user agents don't get synthetic type"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
self.assertNotIn(ATTR_USER_AGENT_SYNTHETIC_TYPE, span.attributes)

def test_no_user_agent_header(self):
"""Test that requests without user agent don't get synthetic type"""
requests.get(self.URL, timeout=5)

span = self.assert_span()
self.assertNotIn(ATTR_USER_AGENT_SYNTHETIC_TYPE, span.attributes)

def test_empty_user_agent_header(self):
"""Test that empty user agent doesn't get synthetic type"""
headers = {"User-Agent": ""}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
self.assertNotIn(ATTR_USER_AGENT_SYNTHETIC_TYPE, span.attributes)

def test_user_agent_substring_match(self):
"""Test that substrings are detected correctly"""
# Test googlebot in middle of string
headers = {"User-Agent": "MyApp/1.0 googlebot crawler"}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
self.assertEqual(
span.attributes.get(ATTR_USER_AGENT_SYNTHETIC_TYPE),
USER_AGENT_SYNTHETIC_TYPE_VALUE_BOT,
)

self.memory_exporter.clear()

# Test alwayson in middle of string
headers = {"User-Agent": "TestFramework/1.0 alwayson monitoring"}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
self.assertEqual(
span.attributes.get(ATTR_USER_AGENT_SYNTHETIC_TYPE),
USER_AGENT_SYNTHETIC_TYPE_VALUE_TEST,
)

def test_user_agent_priority_alwayson_over_bot(self):
"""Test that alwayson takes priority if both patterns match"""
headers = {"User-Agent": "alwayson-googlebot/1.0"}
requests.get(self.URL, headers=headers, timeout=5)

span = self.assert_span()
# alwayson should be checked first and return 'test'
self.assertEqual(
span.attributes.get(ATTR_USER_AGENT_SYNTHETIC_TYPE),
USER_AGENT_SYNTHETIC_TYPE_VALUE_TEST,
)