Skip to content

Commit 11b557e

Browse files
committed
Move changes to the http package.
1 parent d29cb64 commit 11b557e

File tree

4 files changed

+128
-36
lines changed

4 files changed

+128
-36
lines changed

instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,6 @@ def response_hook(span, request_obj, response):
124124
_StabilityMode,
125125
)
126126
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
127-
from opentelemetry.instrumentation.requests.constants import (
128-
BOT_PATTERNS,
129-
TEST_PATTERNS,
130-
)
131127
from opentelemetry.instrumentation.requests.package import _instruments
132128
from opentelemetry.instrumentation.requests.version import __version__
133129
from opentelemetry.instrumentation.utils import (
@@ -138,7 +134,6 @@ def response_hook(span, request_obj, response):
138134
from opentelemetry.propagate import inject
139135
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
140136
USER_AGENT_SYNTHETIC_TYPE,
141-
UserAgentSyntheticTypeValues,
142137
)
143138
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
144139
from opentelemetry.semconv.attributes.network_attributes import (
@@ -153,6 +148,7 @@ def response_hook(span, request_obj, response):
153148
from opentelemetry.trace.span import Span
154149
from opentelemetry.util.http import (
155150
ExcludeList,
151+
detect_synthetic_user_agent,
156152
get_excluded_urls,
157153
parse_excluded_urls,
158154
redact_url,
@@ -166,33 +162,6 @@ def response_hook(span, request_obj, response):
166162
_ResponseHookT = Optional[Callable[[Span, PreparedRequest, Response], None]]
167163

168164

169-
def _detect_synthetic_user_agent(user_agent: str) -> Optional[str]:
170-
"""
171-
Detect synthetic user agent type based on user agent string contents.
172-
173-
Args:
174-
user_agent: The user agent string to analyze
175-
176-
Returns:
177-
UserAgentSyntheticTypeValues.TEST if user agent contains any pattern from TEST_PATTERNS
178-
UserAgentSyntheticTypeValues.BOT if user agent contains any pattern from BOT_PATTERNS
179-
None otherwise
180-
181-
Note: Test patterns take priority over bot patterns.
182-
"""
183-
if not user_agent:
184-
return None
185-
186-
user_agent_lower = user_agent.lower()
187-
188-
if any(test_pattern in user_agent_lower for test_pattern in TEST_PATTERNS):
189-
return UserAgentSyntheticTypeValues.TEST.value
190-
if any(bot_pattern in user_agent_lower for bot_pattern in BOT_PATTERNS):
191-
return UserAgentSyntheticTypeValues.BOT.value
192-
193-
return None
194-
195-
196165
def _set_http_status_code_attribute(
197166
span,
198167
status_code,
@@ -283,7 +252,7 @@ def get_or_create_headers():
283252

284253
# Check for synthetic user agent type
285254
user_agent = headers.get("User-Agent")
286-
synthetic_type = _detect_synthetic_user_agent(user_agent)
255+
synthetic_type = detect_synthetic_user_agent(user_agent)
287256
if synthetic_type:
288257
span_attributes[USER_AGENT_SYNTHETIC_TYPE] = synthetic_type
289258

util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from re import IGNORECASE as RE_IGNORECASE
2020
from re import compile as re_compile
2121
from re import search
22-
from typing import Callable, Iterable, overload
22+
from typing import Callable, Iterable, Optional, overload
2323
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
2424

2525
from opentelemetry.semconv._incubating.attributes.http_attributes import (
@@ -34,6 +34,10 @@
3434
NET_HOST_NAME,
3535
NET_HOST_PORT,
3636
)
37+
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
38+
UserAgentSyntheticTypeValues,
39+
)
40+
from opentelemetry.util.http.constants import BOT_PATTERNS, TEST_PATTERNS
3741

3842
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS = (
3943
"OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS"
@@ -301,3 +305,30 @@ def redact_url(url: str) -> str:
301305
url = remove_url_credentials(url)
302306
url = redact_query_parameters(url)
303307
return url
308+
309+
310+
def detect_synthetic_user_agent(user_agent: Optional[str]) -> Optional[str]:
311+
"""
312+
Detect synthetic user agent type based on user agent string contents.
313+
314+
Args:
315+
user_agent: The user agent string to analyze
316+
317+
Returns:
318+
UserAgentSyntheticTypeValues.TEST if user agent contains any pattern from TEST_PATTERNS
319+
UserAgentSyntheticTypeValues.BOT if user agent contains any pattern from BOT_PATTERNS
320+
None otherwise
321+
322+
Note: Test patterns take priority over bot patterns.
323+
"""
324+
if not user_agent:
325+
return None
326+
327+
user_agent_lower = user_agent.lower()
328+
329+
if any(test_pattern in user_agent_lower for test_pattern in TEST_PATTERNS):
330+
return UserAgentSyntheticTypeValues.TEST.value
331+
if any(bot_pattern in user_agent_lower for bot_pattern in BOT_PATTERNS):
332+
return UserAgentSyntheticTypeValues.BOT.value
333+
334+
return None
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
# limitations under the License.
1414

1515
"""
16-
Constants for OpenTelemetry requests instrumentation.
16+
Constants for OpenTelemetry HTTP utilities.
1717
1818
This module contains configuration constants and pattern definitions used
19-
by the requests instrumentation for various features like synthetic user
19+
by HTTP instrumentation utilities for various features like synthetic user
2020
agent detection.
2121
"""
2222

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Copyright The OpenTelemetry Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
17+
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
18+
UserAgentSyntheticTypeValues,
19+
)
20+
from opentelemetry.util.http import detect_synthetic_user_agent
21+
22+
23+
class TestDetectSyntheticUserAgent(unittest.TestCase):
24+
def test_detect_bot_googlebot(self):
25+
"""Test detection of googlebot user agent."""
26+
user_agent = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
27+
result = detect_synthetic_user_agent(user_agent)
28+
self.assertEqual(result, UserAgentSyntheticTypeValues.BOT.value)
29+
30+
def test_detect_bot_bingbot(self):
31+
"""Test detection of bingbot user agent."""
32+
user_agent = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
33+
result = detect_synthetic_user_agent(user_agent)
34+
self.assertEqual(result, UserAgentSyntheticTypeValues.BOT.value)
35+
36+
def test_detect_test_alwayson(self):
37+
"""Test detection of alwayson test user agent."""
38+
user_agent = "AlwaysOn-Monitor/1.0"
39+
result = detect_synthetic_user_agent(user_agent)
40+
self.assertEqual(result, UserAgentSyntheticTypeValues.TEST.value)
41+
42+
def test_case_insensitive_detection(self):
43+
"""Test that detection is case insensitive."""
44+
# Test uppercase patterns
45+
user_agent_bot = "GOOGLEBOT/2.1"
46+
result = detect_synthetic_user_agent(user_agent_bot)
47+
self.assertEqual(result, UserAgentSyntheticTypeValues.BOT.value)
48+
49+
user_agent_test = "ALWAYSON-Monitor/1.0"
50+
result = detect_synthetic_user_agent(user_agent_test)
51+
self.assertEqual(result, UserAgentSyntheticTypeValues.TEST.value)
52+
53+
def test_normal_user_agent_not_detected(self):
54+
"""Test that normal browser user agents are not detected as synthetic."""
55+
user_agent = (
56+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
57+
)
58+
result = detect_synthetic_user_agent(user_agent)
59+
self.assertIsNone(result)
60+
61+
def test_none_user_agent(self):
62+
"""Test that None user agent returns None."""
63+
result = detect_synthetic_user_agent(None)
64+
self.assertIsNone(result)
65+
66+
def test_empty_user_agent(self):
67+
"""Test that empty user agent returns None."""
68+
result = detect_synthetic_user_agent("")
69+
self.assertIsNone(result)
70+
71+
def test_substring_match(self):
72+
"""Test that substrings are detected correctly."""
73+
# Test googlebot in middle of string
74+
user_agent = "MyApp/1.0 googlebot crawler"
75+
result = detect_synthetic_user_agent(user_agent)
76+
self.assertEqual(result, UserAgentSyntheticTypeValues.BOT.value)
77+
78+
# Test alwayson in middle of string
79+
user_agent = "TestFramework/1.0 alwayson monitoring"
80+
result = detect_synthetic_user_agent(user_agent)
81+
self.assertEqual(result, UserAgentSyntheticTypeValues.TEST.value)
82+
83+
def test_priority_test_over_bot(self):
84+
"""Test that test patterns take priority over bot patterns."""
85+
user_agent = "alwayson-googlebot/1.0"
86+
result = detect_synthetic_user_agent(user_agent)
87+
# alwayson should be checked first and return 'test'
88+
self.assertEqual(result, UserAgentSyntheticTypeValues.TEST.value)
89+
90+
91+
if __name__ == "__main__":
92+
unittest.main()

0 commit comments

Comments
 (0)