Skip to content

Commit 202c6db

Browse files
committed
Add synthetic detection on the server side.
1 parent 5283de4 commit 202c6db

File tree

4 files changed

+230
-1
lines changed

4 files changed

+230
-1
lines changed

instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ def client_response_hook(span: Span, scope: Scope, message: dict[str, Any]):
258258
from opentelemetry.instrumentation.utils import _start_internal_or_server_span
259259
from opentelemetry.metrics import get_meter
260260
from opentelemetry.propagators.textmap import Getter, Setter
261+
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
262+
USER_AGENT_SYNTHETIC_TYPE,
263+
)
261264
from opentelemetry.semconv._incubating.metrics.http_metrics import (
262265
create_http_server_active_requests,
263266
create_http_server_request_body_size,
@@ -276,6 +279,7 @@ def client_response_hook(span: Span, scope: Scope, message: dict[str, Any]):
276279
ExcludeList,
277280
SanitizeValue,
278281
_parse_url_query,
282+
detect_synthetic_user_agent,
279283
get_custom_headers,
280284
normalise_request_header_name,
281285
normalise_response_header_name,
@@ -397,7 +401,13 @@ def collect_request_attributes(
397401
)
398402
http_user_agent = asgi_getter.get(scope, "user-agent")
399403
if http_user_agent:
400-
_set_http_user_agent(result, http_user_agent[0], sem_conv_opt_in_mode)
404+
user_agent_value = http_user_agent[0]
405+
_set_http_user_agent(result, user_agent_value, sem_conv_opt_in_mode)
406+
407+
# Check for synthetic user agent type
408+
synthetic_type = detect_synthetic_user_agent(user_agent_value)
409+
if synthetic_type:
410+
result[USER_AGENT_SYNTHETIC_TYPE] = synthetic_type
401411

402412
if "client" in scope and scope["client"] is not None:
403413
_set_http_peer_ip_server(

instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@
4242
HistogramDataPoint,
4343
NumberDataPoint,
4444
)
45+
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
46+
USER_AGENT_SYNTHETIC_TYPE,
47+
)
4548
from opentelemetry.semconv.attributes.client_attributes import (
4649
CLIENT_ADDRESS,
4750
CLIENT_PORT,
@@ -883,6 +886,133 @@ def update_expected_user_agent(expected):
883886
new_sem_conv=True,
884887
)
885888

889+
async def test_user_agent_synthetic_bot_detection(self):
890+
"""Test that bot user agents are detected as synthetic with type 'bot'"""
891+
test_cases = [
892+
b"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
893+
b"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
894+
b"googlebot/1.0",
895+
b"bingbot/1.0",
896+
]
897+
898+
for user_agent in test_cases:
899+
with self.subTest(user_agent=user_agent):
900+
# Clear headers first
901+
self.scope["headers"] = []
902+
903+
def update_expected_synthetic_bot(expected):
904+
expected[3]["attributes"].update(
905+
{
906+
SpanAttributes.HTTP_USER_AGENT: user_agent.decode(
907+
"utf8"
908+
),
909+
USER_AGENT_SYNTHETIC_TYPE: "bot",
910+
}
911+
)
912+
return expected
913+
914+
self.scope["headers"].append([b"user-agent", user_agent])
915+
app = otel_asgi.OpenTelemetryMiddleware(simple_asgi)
916+
self.seed_app(app)
917+
await self.send_default_request()
918+
outputs = await self.get_all_output()
919+
self.validate_outputs(
920+
outputs, modifiers=[update_expected_synthetic_bot]
921+
)
922+
923+
async def test_user_agent_synthetic_test_detection(self):
924+
"""Test that test user agents are detected as synthetic with type 'test'"""
925+
test_cases = [
926+
b"alwayson/1.0",
927+
b"AlwaysOn/2.0",
928+
b"test-alwayson-client",
929+
]
930+
931+
for user_agent in test_cases:
932+
with self.subTest(user_agent=user_agent):
933+
# Clear headers first
934+
self.scope["headers"] = []
935+
936+
def update_expected_synthetic_test(expected):
937+
expected[3]["attributes"].update(
938+
{
939+
SpanAttributes.HTTP_USER_AGENT: user_agent.decode(
940+
"utf8"
941+
),
942+
USER_AGENT_SYNTHETIC_TYPE: "test",
943+
}
944+
)
945+
return expected
946+
947+
self.scope["headers"].append([b"user-agent", user_agent])
948+
app = otel_asgi.OpenTelemetryMiddleware(simple_asgi)
949+
self.seed_app(app)
950+
await self.send_default_request()
951+
outputs = await self.get_all_output()
952+
self.validate_outputs(
953+
outputs, modifiers=[update_expected_synthetic_test]
954+
)
955+
956+
async def test_user_agent_non_synthetic(self):
957+
"""Test that normal user agents are not marked as synthetic"""
958+
test_cases = [
959+
b"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
960+
b"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
961+
b"PostmanRuntime/7.28.4",
962+
b"curl/7.68.0",
963+
]
964+
965+
for user_agent in test_cases:
966+
with self.subTest(user_agent=user_agent):
967+
# Clear headers first
968+
self.scope["headers"] = []
969+
970+
def update_expected_non_synthetic(expected):
971+
# Should only have the user agent, not synthetic type
972+
expected[3]["attributes"].update(
973+
{
974+
SpanAttributes.HTTP_USER_AGENT: user_agent.decode(
975+
"utf8"
976+
),
977+
}
978+
)
979+
return expected
980+
981+
self.scope["headers"].append([b"user-agent", user_agent])
982+
app = otel_asgi.OpenTelemetryMiddleware(simple_asgi)
983+
self.seed_app(app)
984+
await self.send_default_request()
985+
outputs = await self.get_all_output()
986+
self.validate_outputs(
987+
outputs, modifiers=[update_expected_non_synthetic]
988+
)
989+
990+
async def test_user_agent_synthetic_new_semconv(self):
991+
"""Test synthetic user agent detection with new semantic conventions"""
992+
user_agent = b"Mozilla/5.0 (compatible; Googlebot/2.1)"
993+
994+
def update_expected_synthetic_new_semconv(expected):
995+
expected[3]["attributes"].update(
996+
{
997+
USER_AGENT_ORIGINAL: user_agent.decode("utf8"),
998+
USER_AGENT_SYNTHETIC_TYPE: "bot",
999+
}
1000+
)
1001+
return expected
1002+
1003+
self.scope["headers"] = []
1004+
self.scope["headers"].append([b"user-agent", user_agent])
1005+
app = otel_asgi.OpenTelemetryMiddleware(simple_asgi)
1006+
self.seed_app(app)
1007+
await self.send_default_request()
1008+
outputs = await self.get_all_output()
1009+
self.validate_outputs(
1010+
outputs,
1011+
modifiers=[update_expected_synthetic_new_semconv],
1012+
old_sem_conv=False,
1013+
new_sem_conv=True,
1014+
)
1015+
8861016
async def test_traceresponse_header(self):
8871017
"""Test a traceresponse header is sent when a global propagator is set."""
8881018

instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he
258258
HTTP_SERVER_NAME,
259259
HTTP_URL,
260260
)
261+
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
262+
USER_AGENT_SYNTHETIC_TYPE,
263+
)
261264
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
262265
from opentelemetry.semconv.metrics import MetricInstruments
263266
from opentelemetry.semconv.metrics.http_metrics import (
@@ -271,6 +274,7 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he
271274
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE,
272275
SanitizeValue,
273276
_parse_url_query,
277+
detect_synthetic_user_agent,
274278
get_custom_headers,
275279
normalise_request_header_name,
276280
normalise_response_header_name,
@@ -391,6 +395,11 @@ def collect_request_attributes(
391395
if user_agent is not None and len(user_agent) > 0:
392396
_set_http_user_agent(result, user_agent, sem_conv_opt_in_mode)
393397

398+
# Check for synthetic user agent type
399+
synthetic_type = detect_synthetic_user_agent(user_agent)
400+
if synthetic_type:
401+
result[USER_AGENT_SYNTHETIC_TYPE] = synthetic_type
402+
394403
flavor = environ.get("SERVER_PROTOCOL", "")
395404
if flavor.upper().startswith(_HTTP_VERSION_PREFIX):
396405
flavor = flavor[len(_HTTP_VERSION_PREFIX) :]

instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@
5252
NET_HOST_NAME,
5353
NET_HOST_PORT,
5454
)
55+
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
56+
USER_AGENT_SYNTHETIC_TYPE,
57+
)
5558
from opentelemetry.semconv.attributes.http_attributes import (
5659
HTTP_REQUEST_METHOD,
5760
HTTP_RESPONSE_STATUS_CODE,
@@ -791,6 +794,83 @@ def test_http_user_agent_attribute(self):
791794
expected_new.items(),
792795
)
793796

797+
def test_http_user_agent_synthetic_bot_detection(self):
798+
"""Test that bot user agents are detected as synthetic with type 'bot'"""
799+
test_cases = [
800+
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
801+
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
802+
"googlebot/1.0",
803+
"bingbot/1.0",
804+
]
805+
806+
for user_agent in test_cases:
807+
with self.subTest(user_agent=user_agent):
808+
self.environ["HTTP_USER_AGENT"] = user_agent
809+
attributes = otel_wsgi.collect_request_attributes(self.environ)
810+
811+
# Should have both the original user agent and synthetic type
812+
self.assertIn(HTTP_USER_AGENT, attributes)
813+
self.assertEqual(attributes[HTTP_USER_AGENT], user_agent)
814+
self.assertIn(USER_AGENT_SYNTHETIC_TYPE, attributes)
815+
self.assertEqual(attributes[USER_AGENT_SYNTHETIC_TYPE], "bot")
816+
817+
def test_http_user_agent_synthetic_test_detection(self):
818+
"""Test that test user agents are detected as synthetic with type 'test'"""
819+
test_cases = [
820+
"alwayson/1.0",
821+
"AlwaysOn/2.0",
822+
"test-alwayson-client",
823+
]
824+
825+
for user_agent in test_cases:
826+
with self.subTest(user_agent=user_agent):
827+
self.environ["HTTP_USER_AGENT"] = user_agent
828+
attributes = otel_wsgi.collect_request_attributes(self.environ)
829+
830+
# Should have both the original user agent and synthetic type
831+
self.assertIn(HTTP_USER_AGENT, attributes)
832+
self.assertEqual(attributes[HTTP_USER_AGENT], user_agent)
833+
self.assertIn(USER_AGENT_SYNTHETIC_TYPE, attributes)
834+
self.assertEqual(attributes[USER_AGENT_SYNTHETIC_TYPE], "test")
835+
836+
def test_http_user_agent_non_synthetic(self):
837+
"""Test that normal user agents are not marked as synthetic"""
838+
test_cases = [
839+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
840+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
841+
"PostmanRuntime/7.28.4",
842+
"curl/7.68.0",
843+
]
844+
845+
for user_agent in test_cases:
846+
with self.subTest(user_agent=user_agent):
847+
self.environ["HTTP_USER_AGENT"] = user_agent
848+
attributes = otel_wsgi.collect_request_attributes(self.environ)
849+
850+
# Should have the original user agent but no synthetic type
851+
self.assertIn(HTTP_USER_AGENT, attributes)
852+
self.assertEqual(attributes[HTTP_USER_AGENT], user_agent)
853+
self.assertNotIn(USER_AGENT_SYNTHETIC_TYPE, attributes)
854+
855+
def test_http_user_agent_synthetic_new_semconv(self):
856+
"""Test synthetic user agent detection with new semantic conventions"""
857+
self.environ["HTTP_USER_AGENT"] = (
858+
"Mozilla/5.0 (compatible; Googlebot/2.1)"
859+
)
860+
attributes = otel_wsgi.collect_request_attributes(
861+
self.environ,
862+
_StabilityMode.HTTP,
863+
)
864+
865+
# Should have both the new semconv user agent and synthetic type
866+
self.assertIn(USER_AGENT_ORIGINAL, attributes)
867+
self.assertEqual(
868+
attributes[USER_AGENT_ORIGINAL],
869+
"Mozilla/5.0 (compatible; Googlebot/2.1)",
870+
)
871+
self.assertIn(USER_AGENT_SYNTHETIC_TYPE, attributes)
872+
self.assertEqual(attributes[USER_AGENT_SYNTHETIC_TYPE], "bot")
873+
794874
def test_response_attributes(self):
795875
otel_wsgi.add_response_attributes(self.span, "404 Not Found", {})
796876
otel_wsgi.add_response_attributes(

0 commit comments

Comments
 (0)