Skip to content

Commit bf0898c

Browse files
committed
RFC 1076: fallback on http.endpoint for schema sampler
1 parent 98a31cb commit bf0898c

File tree

10 files changed

+282
-10
lines changed

10 files changed

+282
-10
lines changed

appsec/src/extension/ddtrace.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ static zend_string *(*_ddtrace_ip_extraction_find)(zval *server);
5959

6060
static struct telemetry_rc_info (*_ddtrace_get_telemetry_rc_info)(void);
6161
static void *(*nullable _ddtrace_emit_asm_event)(void);
62+
static zend_string *(*nullable _ddtrace_guess_endpoint_from_url)(
63+
const char *nonnull url, size_t url_len);
6264

6365
static void _test_ddtrace_metric_register_buffer(
6466
zend_string *nonnull name, ddtrace_metric_type type, ddtrace_metric_ns ns);
@@ -115,6 +117,8 @@ static void dd_trace_load_symbols(zend_module_entry *module)
115117
ddtrace_metric_register_buffer, "ddtrace_metric_register_buffer");
116118
ASSIGN_DLSYM(ddtrace_metric_add_point, "ddtrace_metric_add_point");
117119
ASSIGN_DLSYM(_ddtrace_emit_asm_event, "ddtrace_emit_asm_event");
120+
ASSIGN_DLSYM(
121+
_ddtrace_guess_endpoint_from_url, "ddtrace_guess_endpoint_from_url");
118122

119123
if (manually_opened) {
120124
dlclose(handle);
@@ -462,6 +466,16 @@ void dd_trace_emit_asm_event(void)
462466
_asm_event_emitted = true;
463467
}
464468

469+
zend_string *dd_trace_guess_endpoint_from_url(
470+
const char *nonnull url, size_t url_len)
471+
{
472+
if (UNEXPECTED(_ddtrace_guess_endpoint_from_url == NULL)) {
473+
return NULL;
474+
}
475+
476+
return _ddtrace_guess_endpoint_from_url(url, url_len);
477+
}
478+
465479
static PHP_FUNCTION(datadog_appsec_testing_ddtrace_rshutdown)
466480
{
467481
if (zend_parse_parameters_none() == FAILURE) {

appsec/src/extension/ddtrace.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ struct telemetry_rc_info {
9898
};
9999
struct telemetry_rc_info dd_trace_get_telemetry_rc_info(void);
100100

101+
zend_string *nullable dd_trace_guess_endpoint_from_url(
102+
const char *nonnull url, size_t url_len);
103+
101104
typedef enum {
102105
DDTRACE_METRIC_TYPE_GAUGE,
103106
DDTRACE_METRIC_TYPE_COUNT,

appsec/src/extension/request_lifecycle.c

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -942,24 +942,67 @@ static uint64_t _calc_sampling_key(zend_object *root_span, int status_code)
942942
return 0;
943943
}
944944

945+
zend_string *route_or_endpoint = NULL;
946+
bool free_route_or_endpoint = false;
947+
945948
zval *route = zend_hash_str_find(Z_ARRVAL_P(meta), ZEND_STRL("http.route"));
946-
if (!route || Z_TYPE_P(route) != IS_STRING) {
947-
mlog_g(dd_log_debug, "No http.route tag; not sampling");
948-
return 0;
949+
const bool has_http_route = route && Z_TYPE_P(route) == IS_STRING;
950+
if (has_http_route) {
951+
route_or_endpoint = Z_STR_P(route);
952+
} else {
953+
// http.route is absent, check for http.endpoint
954+
zval *endpoint =
955+
zend_hash_str_find(Z_ARRVAL_P(meta), ZEND_STRL("http.endpoint"));
956+
const bool has_http_endpoint =
957+
endpoint && Z_TYPE_P(endpoint) == IS_STRING;
958+
if (has_http_endpoint) {
959+
// http.endpoint is already computed
960+
// Use it unless status code is 404
961+
#define HTTP_NOT_FOUND 404
962+
if (status_code == HTTP_NOT_FOUND) {
963+
mlog_g(dd_log_debug,
964+
"http.endpoint present but status is 404; not sampling");
965+
} else {
966+
route_or_endpoint = Z_STR_P(endpoint);
967+
}
968+
} else {
969+
// http.endpoint not computed, compute it now without setting the
970+
// tag
971+
zval *url =
972+
zend_hash_str_find(Z_ARRVAL_P(meta), ZEND_STRL("http.url"));
973+
const bool has_http_url = url && Z_TYPE_P(url) == IS_STRING;
974+
if (has_http_url) {
975+
route_or_endpoint = dd_trace_guess_endpoint_from_url(
976+
Z_STRVAL_P(url), Z_STRLEN_P(url));
977+
if (route_or_endpoint) {
978+
free_route_or_endpoint = true;
979+
} else {
980+
mlog_g(dd_log_debug,
981+
"Failed to compute http.endpoint; not sampling");
982+
}
983+
} else {
984+
mlog_g(dd_log_debug,
985+
"No http.route, http.endpoint, or http.url; not sampling");
986+
}
987+
}
988+
}
989+
990+
if (!route_or_endpoint) {
991+
goto error;
949992
}
950993

951994
zval *method =
952995
zend_hash_str_find(Z_ARRVAL_P(meta), ZEND_STRL("http.method"));
953996
if (!method || Z_TYPE_P(method) != IS_STRING) {
954997
mlog_g(dd_log_debug, "No http.method tag; not sampling");
955-
return 0;
998+
goto error;
956999
}
9571000

958-
// use fnv-1a hash with: <http.route tag> NULL <http.method tag> NULL
1001+
// use fnv-1a hash with: <route_or_endpoint> NULL <http.method tag> NULL
9591002
// status_code
9601003

9611004
uint64_t hash = FNV_OFFSET_BASIS;
962-
hash = _hash_zend_string(hash, Z_STR_P(route));
1005+
hash = _hash_zend_string(hash, route_or_endpoint);
9631006
hash *= FNV_PRIME; // hash NUL byte
9641007
hash = _hash_zend_string(hash, Z_STR_P(method));
9651008
hash *= FNV_PRIME; // hash NUL byte
@@ -970,7 +1013,22 @@ static uint64_t _calc_sampling_key(zend_object *root_span, int status_code)
9701013
snprintf(status_str, sizeof(status_str), "%d", status_code);
9711014
hash = _hash_string(hash, status_str, status_len);
9721015

1016+
mlog_g(dd_log_debug,
1017+
"Will use sampling key: %" PRIu64
1018+
" for route/endpoint %.*s, method %.*s, status %d",
1019+
hash, (int)ZSTR_LEN(route_or_endpoint), ZSTR_VAL(route_or_endpoint),
1020+
(int)ZSTR_LEN(Z_STR_P(method)), ZSTR_VAL(Z_STR_P(method)), status_code);
1021+
1022+
if (free_route_or_endpoint) {
1023+
zend_string_release(route_or_endpoint);
1024+
}
9731025
return hash;
1026+
1027+
error:
1028+
if (free_route_or_endpoint) {
1029+
zend_string_release(route_or_endpoint);
1030+
}
1031+
return 0;
9741032
}
9751033

9761034
PHP_FUNCTION(datadog_appsec_testing_dump_req_lifecycle_state)

appsec/tests/integration/build.gradle

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -489,9 +489,11 @@ task saveCaches(type: Exec) {
489489
}
490490

491491
if (hasProperty('buildScan')) {
492-
buildScan {
493-
termsOfServiceUrl = 'https://gradle.com/terms-of-service'
494-
termsOfServiceAgree = 'yes'
492+
develocity {
493+
buildScan {
494+
termsOfUseUrl = "https://gradle.com/help/legal-terms-of-use"
495+
termsOfUseAgree = "yes"
496+
}
495497
}
496498
}
497499

appsec/tests/integration/src/test/groovy/com/datadog/appsec/php/integration/Apache2FpmTests.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import static org.testcontainers.containers.Container.ExecResult
1919
@Testcontainers
2020
@Slf4j
2121
@DisabledIf('isZts')
22-
class Apache2FpmTests implements CommonTests, SamplingTestsInFpm {
22+
class Apache2FpmTests implements CommonTests, SamplingTestsInFpm, EndpointFallbackSamplingTests {
2323
static boolean zts = variant.contains('zts')
2424

2525
@Container
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package com.datadog.appsec.php.integration
2+
3+
import com.datadog.appsec.php.docker.AppSecContainer
4+
import com.datadog.appsec.php.model.Trace
5+
import org.junit.jupiter.api.Test
6+
7+
import java.net.http.HttpResponse
8+
9+
trait EndpointFallbackSamplingTests {
10+
11+
/**
12+
* Test Requirement 1: If http.route is present, use it for sampling
13+
* Expected: Schema sampling should work using http.route
14+
*/
15+
@Test
16+
void 'sampling uses http route when present'() {
17+
def trace = container.traceFromRequest('/endpoint_fallback.php?case=with_route') {
18+
HttpResponse<InputStream> resp ->
19+
assert resp.statusCode() == 200
20+
}
21+
assert trace != null
22+
assert trace.first().meta."http.route" == "/users/{id}/profile"
23+
assert trace.first().meta."_dd.appsec.s.res.body" != null // we sampled
24+
25+
trace = container.traceFromRequest('/endpoint_fallback.php?case=with_route') {
26+
HttpResponse<InputStream> resp ->
27+
assert resp.statusCode() == 200
28+
}
29+
assert trace != null
30+
assert trace.first().meta."_dd.appsec.s.res.body" == null // we did not sample again
31+
}
32+
33+
/**
34+
* Test Requirement 2a: If http.route is absent and http.endpoint is present (non-404),
35+
* use http.endpoint for sampling
36+
* Expected: Schema sampling should work using http.endpoint
37+
*/
38+
@Test
39+
void 'sampling uses http endpoint when http route absent and status is not 404'() {
40+
def trace = container.traceFromRequest('/endpoint_fallback.php?case=with_endpoint') {
41+
HttpResponse<InputStream> resp ->
42+
assert resp.statusCode() == 200
43+
}
44+
assert trace != null
45+
46+
assert trace.first().meta."http.route" == null
47+
assert trace.first().meta."http.endpoint" == "/api/products/{param:int}"
48+
assert trace.first().meta."_dd.appsec.s.res.body" != null // sampling happened
49+
50+
trace = container.traceFromRequest('/endpoint_fallback.php?case=with_endpoint') {
51+
HttpResponse<InputStream> resp ->
52+
assert resp.statusCode() == 200
53+
}
54+
assert trace != null
55+
assert trace.first().meta."_dd.appsec.s.res.body" == null // no sampling again
56+
}
57+
58+
/**
59+
* Test Requirement 2b: If http.route is absent and http.endpoint is present but status is 404,
60+
* should NOT sample (failsafe)
61+
* Expected: No schema sampling should occur
62+
*/
63+
@Test
64+
void 'sampling does not use http endpoint when status is 404'() {
65+
def trace = container.traceFromRequest('/endpoint_fallback.php?case=404') {
66+
HttpResponse<InputStream> resp ->
67+
assert resp.statusCode() == 404
68+
}
69+
assert trace != null
70+
assert trace.first().meta."http.route" == null
71+
assert trace.first().meta."http.endpoint" == "/api/notfound/{param:int}"
72+
assert trace.first().meta."_dd.appsec.s.res.body" == null // we did not sample
73+
}
74+
75+
/**
76+
* Test Requirement 3: If neither http.route nor http.endpoint is present,
77+
* compute http.endpoint on-demand and use for sampling, but do NOT set it on the span
78+
* Expected: Schema sampling should work, but http.endpoint should not be in meta
79+
*/
80+
@Test
81+
void 'sampling computes endpoint on-demand when neither route nor endpoint present'() {
82+
def trace = container.traceFromRequest('/endpoint_fallback.php?case=computed') {
83+
HttpResponse<InputStream> resp ->
84+
assert resp.statusCode() == 200
85+
}
86+
assert trace != null
87+
88+
assert trace.first().meta."http.url" != null
89+
assert trace.first().meta."http.url".contains("/endpoint_fallback_computed/users/123/orders/456")
90+
assert trace.first().meta."http.route" == null
91+
assert trace.first().meta."http.endpoint" == null
92+
assert trace.first().meta."_dd.appsec.s.res.body" != null // we did sample
93+
94+
trace = container.traceFromRequest('/endpoint_fallback.php?case=computed') {
95+
HttpResponse<InputStream> resp ->
96+
assert resp.statusCode() == 200
97+
}
98+
assert trace != null
99+
assert trace.first().meta."_dd.appsec.s.res.body" == null
100+
assert trace.first().meta."http.endpoint" == null
101+
}
102+
103+
private AppSecContainer getContainer() {
104+
getClass().container
105+
}
106+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
<?php
2+
$rootSpan = \DDTrace\root_span();
3+
$case = $_GET['case'] ?? 'unknown';
4+
5+
switch ($case) {
6+
case 'with_route':
7+
// Test case: http.route is present - should use it for sampling
8+
$rootSpan->meta["http.route"] = "/users/{id}/profile";
9+
$rootSpan->meta["http.method"] = "GET";
10+
11+
header("Content-Type: application/json");
12+
http_response_code(200);
13+
14+
echo json_encode([
15+
"status" => "ok",
16+
"test_case" => "with_route",
17+
"messages" => ["test", "data"]
18+
]);
19+
break;
20+
21+
case 'with_endpoint':
22+
// Test case: http.route is absent, http.endpoint is present - should use http.endpoint for sampling
23+
// Do NOT set http.route
24+
$rootSpan->meta["http.endpoint"] = "/api/products/{param:int}";
25+
$rootSpan->meta["http.method"] = "GET";
26+
27+
header("Content-Type: application/json");
28+
http_response_code(200);
29+
30+
echo json_encode([
31+
"status" => "ok",
32+
"test_case" => "with_endpoint",
33+
"messages" => ["test", "data"]
34+
]);
35+
break;
36+
37+
case '404':
38+
// Test case: http.route is absent, http.endpoint is present, but status is 404 - should NOT sample
39+
// Do NOT set http.route
40+
$rootSpan->meta["http.endpoint"] = "/api/notfound/{param:int}";
41+
$rootSpan->meta["http.method"] = "GET";
42+
43+
header("Content-Type: application/json");
44+
http_response_code(404);
45+
46+
echo json_encode([
47+
"status" => "error",
48+
"test_case" => "404_with_endpoint",
49+
"message" => "Not found"
50+
]);
51+
break;
52+
53+
case 'computed':
54+
// Test case: Neither http.route nor http.endpoint set - should compute endpoint on-demand
55+
// The endpoint should be computed but NOT added as a tag on the span
56+
// Do NOT set http.route or http.endpoint
57+
// Set http.url so endpoint can be computed
58+
$rootSpan->meta["http.url"] = "http://localhost:8080/endpoint_fallback_computed/users/123/orders/456";
59+
$rootSpan->meta["http.method"] = "GET";
60+
61+
header("Content-Type: application/json");
62+
http_response_code(200);
63+
64+
echo json_encode([
65+
"status" => "ok",
66+
"test_case" => "computed_on_demand",
67+
"messages" => ["test", "data"],
68+
]);
69+
break;
70+
71+
default:
72+
header("Content-Type: application/json");
73+
http_response_code(400);
74+
75+
echo json_encode([
76+
"status" => "error",
77+
"test_case" => "unknown",
78+
"message" => "Invalid case parameter. Valid values: with_route, with_endpoint, 404, computed"
79+
]);
80+
break;
81+
}

ddtrace.sym

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ ddtrace_metric_add_point
1616
ddtrace_emit_asm_event
1717
ddtrace_loaded_by_ssi
1818
ddtrace_ssi_forced_injection_enabled
19+
ddtrace_guess_endpoint_from_url
1920
ddog_remote_config_reader_for_path
2021
ddog_remote_config_read
2122
ddog_remote_config_reader_drop

ext/endpoint_guessing.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@ static zend_string* guess_endpoint(const char* orig_url, size_t orig_url_len) {
185185
return smart_str_extract(&result);
186186
}
187187

188+
DDTRACE_PUBLIC zend_string* ddtrace_guess_endpoint_from_url(const char* url, size_t url_len)
189+
{
190+
return guess_endpoint(url, url_len);
191+
}
192+
188193
void ddtrace_maybe_add_guessed_endpoint_tag(ddtrace_root_span_data *span)
189194
{
190195
zval *span_type = &span->property_type;

ext/endpoint_guessing.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

33
#include "ddtrace.h"
4+
#include "ddtrace_export.h"
45

56
void ddtrace_maybe_add_guessed_endpoint_tag(ddtrace_root_span_data *span);
7+
DDTRACE_PUBLIC zend_string* ddtrace_guess_endpoint_from_url(const char* url, size_t url_len);

0 commit comments

Comments
 (0)